import copy
import glob
import japanize_matplotlib
import math
import matplotlib as mpl
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
import os
import pandas as pd
import random
import sys
from sklearn import linear_model
from sklearn.linear_model import HuberRegressor, LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import sklearn.preprocessing as sp
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
# ノートブック中で変数のみを記述することでデータフレームをきれいに表示させる設定の有効化
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
# 各種値を宣言
benchmarks = ["bt", "cg", "ep", "ft", "is", "lu", "mg", "sp"]
classes = ["S", "W", "A", "B", "C", "D"]
processes = [1, 2, 4, 8, 16, 32, 64, 128, 256]
fix_process = 4
fix_benchmark_class = "C"
def return_fixed_class(
BenchMark="bt",
Processes=[1, 2, 4, 8, 16, 32, 64, 128, 256],
FixedBenchMarkClass="C",
):
path = "./csv_files/"
# fixed_Class にはベンチマーククラスFixedBenchMarkClassで実行プロセス数がProcessesに該当するものの結果が入る
fixed_Class = []
for process in Processes:
file_name = "pprof_" + BenchMark + FixedBenchMarkClass + str(process) + ".csv"
file_path = path + file_name
if os.path.exists(file_path) and os.stat(file_path).st_size != 0:
data_frame = pd.read_csv(path + file_name)
data_frame = data_frame.set_index(["Name"])
fixed_Class.append(
data_frame.rename(columns={"#Call": process}).sort_index()
)
return fixed_Class
def show_fixed_class_graph(
BenchMark="bt",
Processes=[1, 2, 4, 8, 16, 32, 64, 128, 256],
FixedBenchMarkClass="C",
):
markers = [
".",
",",
"o",
"v",
"^",
"<",
">",
"1",
"2",
"3",
"4",
"8",
"s",
"p",
"*",
"h",
"H",
"+",
"x",
"D",
"d",
"|",
"_",
"None",
None,
"",
"$x$",
"$\\alpha$",
"$\\beta$",
"$\\gamma$",
]
colors = [
"#e41a1c",
"#377eb8",
"#4daf4a",
"#984ea3",
"#ff7f00",
"#ffff33",
"#a65628",
"#f781bf",
]
fixed_Class = return_fixed_class(
BenchMark=BenchMark,
Processes=Processes,
FixedBenchMarkClass=FixedBenchMarkClass,
)
if len(fixed_Class) != 0:
summary_fixed_Class = pd.concat(fixed_Class, axis=1)
summary_fixed_Class.sort_index(axis=1, inplace=True)
summary_fixed_Class_title = (
BenchMark + "においてベンチマーククラスをCに固定し実行プロセス数を変化させたときの実行された全ての関数のコール回数"
)
x_axes = summary_fixed_Class.columns.tolist()
y_axes = summary_fixed_Class.index.tolist()
plt.figure()
for y_axis in y_axes:
color = random.choice(colors)
label = y_axis
marker = random.choice(markers)
plt.plot(x_axes, summary_fixed_Class.T[y_axis], marker=marker, label=y_axis)
plt.legend()
plt.title(BenchMark + "_FixedBenchMarkClass=" + FixedBenchMarkClass)
plt.show()
# 使用例
# show_fixed_class_graph(BenchMark="cg")
def return_fixed_process(
BenchMark="bt", BenchMarkClasses=["S", "W", "A", "B", "C", "D"], FixedProcess=32
):
path = "./csv_files/"
# fixed_process には実行プロセス数が64でベンチマーククラスがA ~ Dまでの結果が入る
fixed_process = list()
for bench_mark_class in BenchMarkClasses:
file_name = "pprof_" + BenchMark + bench_mark_class + str(FixedProcess) + ".csv"
file_path = path + file_name
if os.path.exists(file_path) and os.stat(file_path).st_size != 0:
data_frame = pd.read_csv(path + file_name)
data_frame = data_frame.set_index(["Name"])
fixed_process.append(
data_frame.rename(columns={"#Call": bench_mark_class}).sort_index()
)
return fixed_process
def show_fixed_process_graph(
BenchMark="bt", BenchMarkClasses=["S", "W", "A", "B", "C", "D"], FixedProcess=32
):
fixed_process = return_fixed_process(
BenchMark=BenchMark,
BenchMarkClasses=BenchMarkClasses,
FixedProcess=FixedProcess,
)
markers = [
".",
",",
"o",
"v",
"^",
"<",
">",
"1",
"2",
"3",
"4",
"8",
"s",
"p",
"*",
"h",
"H",
"+",
"x",
"D",
"d",
"|",
"_",
"None",
None,
"",
"$x$",
"$\\alpha$",
"$\\beta$",
"$\\gamma$",
]
colors = [
"#e41a1c",
"#377eb8",
"#4daf4a",
"#984ea3",
"#ff7f00",
"#ffff33",
"#a65628",
"#f781bf",
]
if len(fixed_process) != 0:
summary_fixed_process = pd.concat(fixed_process, axis=1)
x_axes = summary_fixed_process.columns.tolist()
y_axes = summary_fixed_process.index.tolist()
plt.figure()
for y_axis in y_axes:
color = random.choice(colors)
label = y_axis
marker = random.choice(markers)
plt.plot(
x_axes, summary_fixed_process.T[y_axis], marker=marker, label=y_axis
)
plt.legend()
plt.title(BenchMark + "_FixedProcess=" + str(FixedProcess))
plt.show()
# 使用例
# show_fixed_process_graph(BenchMark="cg")
def show_graph(
BenchMarks=[],
Processes=[],
BenchMarkClasses=[],
fix_process=4,
fix_benchmark_class="C",
):
if BenchMarks == [] or Processes == [] or BenchMarkClasses == []:
print("関数の引数となっている配列が空です。")
else:
for bench_mark in BenchMarks:
show_fixed_class_graph(
BenchMark=bench_mark,
Processes=Processes,
FixedBenchMarkClass=fix_benchmark_class,
)
show_fixed_process_graph(
BenchMark=bench_mark,
BenchMarkClasses=BenchMarkClasses,
FixedProcess=fix_process,
)
bench_marks = ["bt", "cg", "ep", "ft", "is", "lu", "mg", "sp"]
processes = [1, 2, 4, 8, 16, 32, 64, 128, 256]
classes = ["S", "W", "A", "B", "C", "D"]
# 使用例
# show_graph(bench_marks, processes, classes)
def return_summarized_Fixed_dataframe(BenchMark_name="cg", fixed="class"):
def evaluate_dataframes(df1, df2):
for i in range(len(df1.values.tolist()[0])):
if df1.values.tolist()[0][i] != df2.values.tolist()[0][i]:
return False
return True
fixed_df = 0
if fixed == "class":
fixed_df = return_fixed_class(BenchMark=BenchMark_name)
elif fixed == "process":
fixed_df = return_fixed_process(BenchMark=BenchMark_name)
summary_fixed_df = pd.concat(fixed_df, axis=1)
dropped_summary_fixed_df = summary_fixed_df.drop_duplicates()
dropped_summary_fixed_df_renamed = dropped_summary_fixed_df
for dropped_index_name in dropped_summary_fixed_df.index.values:
dropped_index_name_data = summary_fixed_df.loc[[dropped_index_name]]
replace_name = dropped_index_name
for all_index_name in summary_fixed_df.index.values:
all_index_name_data = summary_fixed_df.loc[[all_index_name]]
if dropped_index_name == all_index_name:
pass
elif evaluate_dataframes(dropped_index_name_data, all_index_name_data):
replace_name += f", {all_index_name}"
dropped_summary_fixed_df_renamed = dropped_summary_fixed_df_renamed.rename(
index={dropped_index_name: replace_name}
)
return dropped_summary_fixed_df_renamed
# 平均絶対パーセント誤差 (MAPE)(Mean Absolute Percent Error (MAPE))を返す関数
# 引数として長さの同じ二つのリストをとる
# 引数l1: 実測値のリスト
# 引数l2: 予測値のリスト
def mape_score(l1, l2):
return_num = 0
if len(l1) != len(l2):
print("引数のリストの長さが異なります", end=", ")
return -1
for i in range(len(l1)):
l1_num = l1[i]
l2_num = l2[i]
return_num += abs((l1_num - l2_num) / l1_num)
return_num /= len(l1)
return_num *= 100
return return_num
# 使用例:mape_score([1,2,3,4], [4,3,2,1])
type(mape_score([1, 2, 3, 4], [4, 3, 2, 1]))
float
def split_by_ratio(base_list: list, test_ratio: float):
test_index = math.floor(len(base_list) * float(test_ratio))
train_list = base_list[:-test_index]
test_list = base_list[-test_index:]
if test_ratio == 0:
return base_list, []
return train_list, test_list
class ModelBase:
def __init__(
self,
raw_x,
raw_y,
benchmark_name="benchmark_name",
function_name="function_name",
test_ratio=0.3,
):
self.benchmark_name = benchmark_name
self.function_name = function_name
self.xlabel = "実行時のプロセス数"
self.ylabel = "プロセスごとの関数コール回数"
self.raw_x = np.reshape(raw_x, (-1, 1))
self.raw_y = np.reshape(raw_y, (-1, 1))
self.train_x, self.test_x = split_by_ratio(self.raw_x, test_ratio)
self.train_y, self.test_y = split_by_ratio(self.raw_y, test_ratio)
if len(self.train_x) == len(self.test_x) or len(self.train_y) == len(
self.test_y
):
print(f"学習用とテスト用にデータを分割するのに問題が生じています。@{benchmark_name}")
print(f"len(self.train_x) == {len(self.train_x)}")
print(f"len(self.train_y) == {len(self.train_y)}")
print(f"len(self.test_x) == {len(self.test_x)}")
print(f"len(self.test_y) == {len(self.test_y)}")
self.x_model_line = np.reshape(
np.arange(start=0.1, stop=self.raw_x.max(), step=0.1), (-1, 1)
)
self.y_model_line = 0
self.lr = 0
self.r2_score = 0
def calc_lr(self):
self.lr = 0
def calc_r2_score(self):
self.r2_score = 0
def calc_mae_score(self):
self.mae_score = 0
def calc_mse_score(self):
self.mse_score = 0
def calc_rmse_score(self):
self.rmse_score = 0
def calc_mape_score(self):
self.mape_score = 0
def plot_graph(self):
plt.figure()
plt.plot(self.raw_x, self.raw_y, color="red")
plt.show()
class ModelLin(ModelBase):
def calc_lr(self, AllData=False):
self.lr = LinearRegression()
self.lr.fit(self.train_x, self.train_y)
def calc_r2_score(self):
test_y_predicted = self.lr.predict(self.test_x)
self.r2_score = r2_score(self.test_y, test_y_predicted)
def calc_mae_score(self):
test_y_predicted = self.lr.predict(self.test_x)
self.mae_score = mean_absolute_error(self.test_y, test_y_predicted)
def calc_mse_score(self):
test_y_predicted = self.lr.predict(self.test_x)
self.mse_score = mean_squared_error(self.test_y, test_y_predicted)
def calc_rmse_score(self):
self.calc_mse_score()
self.rmse_score = np.sqrt(self.mse_score)
def calc_mape_score(self):
test_y_predicted = self.lr.predict(self.test_x)
self.mape_score = float(mape_score(self.test_y, test_y_predicted))
def calc_mape_score_InTrain(self):
train_y_predicted = self.lr.predict(self.train_x)
self.mape_score_InTrain = float(mape_score(self.train_y, train_y_predicted))
def plot_graph(self, save=False, fileName="graph.pdf"):
plt.figure()
plt.scatter(self.raw_x, self.raw_y)
self.y_model_line = self.lr.predict(self.x_model_line)
plt.plot(self.x_model_line, self.y_model_line, color="red")
plt.xlabel(self.xlabel)
plt.ylabel(self.ylabel)
if save:
plt.savefig(fileName)
def predict(self, num):
predicted = self.lr.predict(num)
return predicted
def ModelName(self):
return "ModelLin"
def inverter_log10_func(x):
return 10**x
class ModelLog10(ModelBase):
def calc_lr(self, AllData=False):
self.transformer_log10 = sp.FunctionTransformer(
func=np.log10, inverse_func=inverter_log10_func
)
x_train_log10 = self.transformer_log10.transform(self.train_x)
y_train_log10 = self.transformer_log10.transform(self.train_y)
self.lr = LinearRegression()
self.lr.fit(x_train_log10, y_train_log10)
def calc_r2_score(self):
train_x_log10 = self.transformer_log10.transform(self.train_x)
train_y_predicted_log10 = self.lr.predict(train_x_log10)
train_y_predicted = self.transformer_log10.inverse_transform(
train_y_predicted_log10
)
self.r2_score = r2_score(self.train_y, train_y_predicted)
def calc_mae_score(self):
train_x_log10 = self.transformer_log10.transform(self.train_x)
train_y_predicted_log10 = self.lr.predict(train_x_log10)
train_y_predicted = self.transformer_log10.inverse_transform(
train_y_predicted_log10
)
self.mae_score = mean_absolute_error(self.train_y, train_y_predicted)
def calc_mse_score(self):
train_x_log10 = self.transformer_log10.transform(self.train_x)
train_y_predicted_log10 = self.lr.predict(train_x_log10)
train_y_predicted = self.transformer_log10.inverse_transform(
train_y_predicted_log10
)
self.mse_score = mean_squared_error(self.train_y, train_y_predicted)
def calc_rmse_score(self):
self.calc_mse_score()
self.rmse_score = np.sqrt(self.mse_score)
def calc_mape_score(self):
test_x_log10 = self.transformer_log10.transform(self.test_x)
test_y_predicted_log10 = self.lr.predict(test_x_log10)
test_y_predicted = self.transformer_log10.inverse_transform(
test_y_predicted_log10
)
self.mape_score = float(mape_score(self.test_y, test_y_predicted))
def calc_mape_score_InTrain(self):
train_x_log10 = self.transformer_log10.transform(self.train_x)
train_y_predicted_log10 = self.lr.predict(train_x_log10)
train_y_predicted = self.transformer_log10.inverse_transform(
train_y_predicted_log10
)
self.mape_score_InTrain = float(mape_score(self.train_y, train_y_predicted))
def plot_graph(self, save=False, fileName="graph.pdf"):
plt.figure()
plt.scatter(self.raw_x, self.raw_y)
x_model_line_log10 = self.transformer_log10.transform(self.x_model_line)
y_model_line_log10 = self.lr.predict(x_model_line_log10)
self.y_model_line = self.transformer_log10.inverse_transform(y_model_line_log10)
plt.plot(self.x_model_line, self.y_model_line, color="red")
plt.xlabel(self.xlabel)
plt.ylabel(self.ylabel)
if save:
plt.savefig(fileName)
def predict(self, num):
num_log10 = self.transformer_log10.transform(num)
predicted_log10 = self.lr.predict(num_log10)
predicted = self.transformer_log10.inverse_transform(predicted_log10)
return predicted
def return_coef_(self):
return self.lr.coef_
def return_intercept_(self):
return self.lr.intercept_
def ModelName(self):
return "ModelLog10"
# Inverse proportion
def ip_func(x):
return 1 / x
class ModelIP(ModelBase):
def calc_lr(self, AllData=False):
self.transformer_ip = sp.FunctionTransformer(func=ip_func, inverse_func=ip_func)
y_train_ip = self.transformer_ip.transform(self.train_y)
self.lr = LinearRegression()
self.lr.fit(self.train_x, y_train_ip)
def calc_r2_score(self):
train_y_predicted_ip = self.lr.predict(self.train_x)
train_y_predicted = self.transformer_ip.inverse_transform(train_y_predicted_ip)
self.r2_score = r2_score(self.train_y, train_y_predicted)
def calc_mae_score(self):
train_y_predicted_ip = self.lr.predict(self.train_x)
train_y_predicted = self.transformer_ip.inverse_transform(train_y_predicted_ip)
self.mae_score = mean_absolute_error(self.train_y, train_y_predicted)
def calc_mse_score(self):
train_y_predicted_ip = self.lr.predict(self.train_x)
train_y_predicted = self.transformer_ip.inverse_transform(train_y_predicted_ip)
self.mse_score = mean_squared_error(self.train_y, train_y_predicted)
def calc_rmse_score(self):
self.calc_mse_score()
self.rmse_score = np.sqrt(self.mse_score)
def calc_mape_score(self):
test_y_predicted_ip = self.lr.predict(self.test_x)
test_y_predicted = self.transformer_ip.inverse_transform(test_y_predicted_ip)
self.mape_score = float(mape_score(self.test_y, test_y_predicted))
def calc_mape_score_InTrain(self):
train_y_predicted_ip = self.lr.predict(self.train_x)
train_y_predicted = self.transformer_ip.inverse_transform(train_y_predicted_ip)
self.mape_score_InTrain = float(mape_score(self.train_y, train_y_predicted))
def plot_graph(self, save=False, fileName="graph.pdf"):
plt.figure()
plt.scatter(self.raw_x, self.raw_y)
y_model_line_ip = self.lr.predict(self.x_model_line)
self.y_model_line = self.transformer_ip.inverse_transform(y_model_line_ip)
plt.plot(self.x_model_line, self.y_model_line, color="red")
plt.xlabel(self.xlabel)
plt.ylabel(self.ylabel)
if save:
plt.savefig(fileName)
def predict(self, num):
predicted_ip = self.lr.predict(num)
predicted = self.transformer_ip.inverse_transform(predicted_ip)
return predicted
def ModelName(self):
return "ModelIP"
class ModelBranch(ModelBase):
def calc_lr(self, AllData=False):
# 後述する t を算出するための処理
max_in_train_y = max(self.train_y)
max_in_train_y_first_index = self.train_y.tolist().index(max_in_train_y)
# 分岐点のインデックスを t とする
t = max_in_train_y_first_index
self.t = t
if self.t == 0 or self.t == len(self.train_y) - 1:
self.lr1 = LinearRegression()
self.lr1.fit(self.train_x, self.train_y)
self.lr2 = LinearRegression()
self.lr2.fit(self.train_x, self.train_y)
else:
self.x_train_1 = self.train_x[:t]
self.x_train_2 = self.train_x[t:]
self.y_train_1 = self.train_y[:t]
self.y_train_2 = self.train_y[t:]
self.lr1 = LinearRegression()
self.lr1.fit(self.x_train_1, self.y_train_1)
self.lr2 = LinearRegression()
self.lr2.fit(self.x_train_2, self.y_train_2)
def calc_mape_score(self):
if self.t == 0 or self.t == len(self.train_y) - 1:
test_y_predicted = self.lr1.predict(self.test_x)
self.mape_score = float(mape_score(self.test_y, test_y_predicted))
else:
x_test = self.test_x
y_test = self.test_y
y_test_predicted = self.lr2.predict(x_test)
self.mape_score = float(mape_score(y_test, y_test_predicted))
def calc_mape_score_InTrain(self):
if self.t == 0 or self.t == len(self.train_y) - 1:
train_y_predicted = self.lr1.predict(self.train_x)
self.mape_score_InTrain = float(mape_score(self.train_y, train_y_predicted))
else:
train_y_predicted_1 = self.lr1.predict(self.x_train_1)
train_y_predicted_2 = self.lr2.predict(self.x_train_2)
mape_1 = float(mape_score(self.y_train_1, train_y_predicted_1))
mape_2 = float(mape_score(self.y_train_2, train_y_predicted_2))
self.mape_score_InTrain = (mape_1 + mape_2) / 2
def plot_graph(self, save=False, fileName="graph.pdf"):
plt.figure()
plt.scatter(self.raw_x, self.raw_y)
if self.t == 0 or self.t == len(self.train_y) - 1:
y_model_line = self.lr.predict(x_model_line)
plt.plot(self.x_model_line, y_model_line, color="red")
else:
# 回帰曲線を二つのモデルで分割するための処理
x_model_line = self.x_model_line
t_in_model_line = 0
for i in range(len(x_model_line)):
if self.train_x[self.t] < x_model_line[i]:
t_in_model_line = i
break
else:
t_in_model_line = i
x_model_line1 = self.x_model_line[:t_in_model_line]
x_model_line2 = self.x_model_line[t_in_model_line:]
y_model_line1 = self.lr1.predict(x_model_line1)
y_model_line2 = self.lr2.predict(x_model_line2)
plt.plot(x_model_line1, y_model_line1, color="red")
plt.plot(x_model_line2, y_model_line2, color="red")
# plt.plot(self.test_x, self.test_y, color="yellow")
plt.xlabel(self.xlabel)
plt.ylabel(self.ylabel)
if save:
plt.savefig(fileName)
def predict(self, num):
# if num < self.raw_x[self.t]:
# predicted = self.lr1.predict(num)
# else:
# predicted = self.lr2.predict(num)
predicted = self.lr2.predict(num)
return predicted
def ModelName(self):
return "ModelBranch"
def return_dict_summary_fixed(benchmark_name="cg", fixed="class"):
if fixed == "class":
fixed_ = return_fixed_class(BenchMark=benchmark_name)
else:
fixed_ = return_fixed_process(BenchMark=benchmark_name)
summary_fixed_ = pd.concat(fixed_, axis=1)
columns = summary_fixed_.columns.to_numpy()
index = summary_fixed_.index.to_numpy()
if fixed == "class":
dict_summary_fixed_ = {"processes": columns}
else:
dict_summary_fixed_ = {"class": columns}
for index_name in index:
dict_summary_fixed_[index_name] = summary_fixed_.T[index_name].to_numpy()
return dict_summary_fixed_
# NaNが入った引数のリストをNaNのみを0にして返す関数
def return_non_NaN_list(target_list):
for i in range(len(target_list)):
if math.isnan(target_list[i]):
target_list[i] = 0
return target_list
# NaNが入ったリストが引数として渡されるとTrueを返す関数
def does_include_nan(target_list):
for i in range(len(target_list)):
if math.isnan(target_list[i]):
return True
return False
# 線形・対数・反比例モデルでフィッティングを行い、MAPE値をまとめたCSVファイルを作成する関数
def generateScoreTable(benchmark_name="cg"):
list_ScoreTable = []
dict_summary_fixed_class = return_dict_summary_fixed(
benchmark_name=benchmark_name, fixed="class"
)
raw_x = dict_summary_fixed_class["processes"]
for content in dict_summary_fixed_class:
if content == "processes":
continue
raw_y = dict_summary_fixed_class[content]
if does_include_nan(raw_y):
continue
# 線形モデル
model_lin = ModelLin(raw_x, raw_y, benchmark_name, content)
model_lin.calc_lr()
model_lin.calc_r2_score()
model_lin.calc_mae_score()
model_lin.calc_mse_score()
model_lin.calc_rmse_score()
model_lin.calc_mape_score()
# logモデル
model_log10 = ModelLog10(raw_x, raw_y, benchmark_name, content)
model_log10.calc_lr()
model_log10.calc_r2_score()
model_log10.calc_mae_score()
model_log10.calc_mse_score()
model_log10.calc_rmse_score()
model_log10.calc_mape_score()
# 反比例モデル
model_ip = ModelIP(raw_x, raw_y, benchmark_name, content)
model_ip.calc_lr()
model_ip.calc_r2_score()
model_ip.calc_mae_score()
model_ip.calc_mse_score()
model_ip.calc_rmse_score()
model_ip.calc_mape_score()
list_ScoreTable.append(
[content, model_lin.mape_score, model_log10.mape_score, model_ip.mape_score]
)
df_ScoreTable = pd.DataFrame(list_ScoreTable)
df_ScoreTable.columns = ["", "x mape", "logx mape", "1/x mape"]
df_ScoreTable.set_index("", inplace=True)
df_ScoreTable.to_csv("./tmp_GenerateScoreTable/" + benchmark_name + ".csv")
# 行の内容が同じものをまとめ、行タイトルに重複した行タイトルがまとめられた、データフレームが返される関数
def return_summarized_Fixed_dataframe(BenchMark_name="cg", fixed="class"):
def evaluate_dataframes(df1, df2):
for i in range(len(df1.values.tolist()[0])):
if df1.values.tolist()[0][i] != df2.values.tolist()[0][i]:
return False
return True
fixed_df = 0
if fixed == "class":
fixed_df = return_fixed_class(BenchMark=BenchMark_name)
elif fixed == "process":
fixed_df = return_fixed_process(BenchMark=BenchMark_name)
summary_fixed_df = pd.concat(fixed_df, axis=1)
dropped_summary_fixed_df = summary_fixed_df.drop_duplicates()
dropped_summary_fixed_df_renamed = dropped_summary_fixed_df
for dropped_index_name in dropped_summary_fixed_df.index.values:
dropped_index_name_data = summary_fixed_df.loc[[dropped_index_name]]
replace_name = dropped_index_name
for all_index_name in summary_fixed_df.index.values:
all_index_name_data = summary_fixed_df.loc[[all_index_name]]
if dropped_index_name == all_index_name:
pass
elif evaluate_dataframes(dropped_index_name_data, all_index_name_data):
replace_name += f", {all_index_name}"
dropped_summary_fixed_df_renamed = dropped_summary_fixed_df_renamed.rename(
index={dropped_index_name: replace_name}
)
return dropped_summary_fixed_df_renamed
# ~~~_excludeBTSP, ~~~_onlyBTSP はそれぞれのベンチマークで取得したプロセス数
processes_excludeBTSP = [1, 2, 4, 8, 16, 32, 64, 128, 256]
processes_onlyBTSP = [1, 4, 16, 64, 256]
# 引数に横軸:プロセス数orベンチマーククラス, 縦軸:関数名となっているデータフレームを取る
# 返値として
# rowData:プロセス数のリスト もしくは プロセス数のリスト (引数に由来)
# 各種関数名:実行回数のリスト
# 以上のような関係の辞書を返す
def return_dict_Data(DataFrame):
columns = DataFrame.columns.to_numpy()
index = DataFrame.index.to_numpy()
# 返値となる辞書return_dictに引数のデータフレームの列名(プロセス数orベンチマーククラス)を格納
return_dict = {"rowData": columns}
for index_name in index:
return_dict[index_name] = DataFrame.T[index_name].to_numpy()
return return_dict
### 引数はx軸となる値のリスト, y軸となる値のリスト, 関数名の文字列, 訓練データでMAPEを算出するかどうかの真偽
# 返り値は次のようなリスト
# [<関数名の文字列>, <線形モデルのMAPE値>, <対数モデルのMAPE値>, <反比例モデルのMAPE値>, <分岐モデルのMAPE値>]
def return_Mape_row_list(
x: list, y: list, function_name: str, test_ratio=0.3, train=False
):
# 変数:model_lin
# 線形モデル
model_lin = ModelLin(x, y, test_ratio=test_ratio)
if test_ratio == 0:
model_lin.train_x = model_lin.raw_x
model_lin.train_y = model_lin.raw_y
model_lin.calc_lr()
model_lin.calc_mape_score()
model_lin.calc_mape_score_InTrain()
# 変数:model_log
# 対数モデル
model_log = ModelLog10(x, y, test_ratio=test_ratio)
if test_ratio == 0:
model_log.train_x = model_log.raw_x
model_log.train_y = model_log.raw_y
model_log.calc_lr()
model_log.calc_mape_score()
model_log.calc_mape_score_InTrain()
# 変数:model_ip
# 反比例モデル
model_ip = ModelIP(x, y, test_ratio=test_ratio)
if test_ratio == 0:
model_ip.train_y = model_ip.raw_y
model_ip.train_x = model_ip.raw_x
model_ip.calc_lr()
model_ip.calc_mape_score()
model_ip.calc_mape_score_InTrain()
# 変数:model_branch
# 特異点付き条件分岐モデル
model_branch = ModelBranch(x, y, test_ratio=test_ratio)
if test_ratio == 0:
model_branch.train_x = model_branch.raw_x
model_branch.train_y = model_branch.raw_y
model_branch.calc_lr()
model_branch.calc_mape_score()
model_branch.calc_mape_score_InTrain()
if train == True:
lin_score = model_lin.mape_score_InTrain
log_score = model_log.mape_score_InTrain
ip_score = model_ip.mape_score_InTrain
branch_score = model_branch.mape_score_InTrain
else:
lin_score = model_lin.mape_score
log_score = model_log.mape_score
ip_score = model_ip.mape_score
branch_score = model_branch.mape_score
# 変数:return_list
# 返り値となるリスト
return_list = [function_name, lin_score, log_score, ip_score, branch_score]
return return_list
# 引数は「return_dict_DataFrame()」の返値, テストとして何割のデータを用いるかの割合, 訓練データでMAPEを算出するかの真偽
# 返り値は行・列がモデル名・関数名で要素がMAPE値となっているDataFrame
def return_MapeTable_per_benchmark(dict_data: dict, test_ratio, train=False):
# 変数:_names
# 引数の辞書のプロセス数もしくはベンチマーククラスの文字列のリスト
_names = dict_data["rowData"]
# 変数:function_names
# 引数の辞書の関数名の文字列のリスト
function_names = list(dict_data.keys())
function_names.remove("rowData")
# リスト変数:before_DataFrame_list
# 最終的にDataFrameとする元となるリスト
before_DataFrame_list = []
collumn_names = [
"function name",
"Linear model",
"Log10 model",
"Inverse model",
"Branch model",
]
for function_name in function_names:
if does_include_nan(dict_data[function_name]):
continue
before_DataFrame_list.append(
return_Mape_row_list(
x=_names,
y=dict_data[function_name],
function_name=function_name,
test_ratio=test_ratio,
train=train,
)
)
# 変数:return_df
# 返り値となるリスト
return_df = pd.DataFrame(before_DataFrame_list)
return_df.columns = collumn_names
return_df = return_df.set_index("function name")
return return_df
# 構造体的に利用可能なクラス MapeData
# 各ベンチマークの各モデルごとに作成される。
# 要素として、割合, 最大値, 最小値 がある。
class MapeData:
def __init__(self):
self.ratio = 0
self.max = np.nan
self.min = np.nan
self.appearance = 0
def printData(self):
print(f"{self.ratio}({self.min}, {self.max})")
def return_Data(self):
max_min = ""
if self.min is np.nan:
max_min = "(NoData)"
else:
max_min = f"({self.min}, {self.max})"
return f"{self.ratio}%{max_min}"
# 引数に「return_MapeTable_per_benchmark()」の返り値, ベンチマーク名, (オプショナル)中間データの詳細をとる
# 返り値として次のようなリストを返す
# [<線形モデルのMAPEに関する奴>, <対数モデルのMAPEに関する奴>, <反比例モデルのMAPEに関する奴>, <ベンチマーク名>]
def return_MapeTable_row(MapeDataframe_detail, benchmark_name: str):
# 引数として渡されたデータフレームの行列名をindex, columnsに格納
columns = MapeDataframe_detail.columns.to_numpy()
index = MapeDataframe_detail.index.to_numpy()
# この関数で返すリストの要素の準備
MapeLin = MapeData()
MapeLog = MapeData()
MapeIP = MapeData()
MapeBr = MapeData()
return_list = [MapeLin, MapeLog, MapeIP, MapeBr, benchmark_name]
# 返り値のリストの各要素の値を更新
for function_name in index:
MapeData_per_function = MapeDataframe_detail.loc[function_name].to_list()
min_mape = min(MapeData_per_function)
min_mape_index = MapeData_per_function.index(min_mape)
rounded_min_mape = int(min_mape * 10) / 10
return_list[min_mape_index].appearance += 1
if return_list[min_mape_index].max is np.nan:
return_list[min_mape_index].max = rounded_min_mape
return_list[min_mape_index].min = rounded_min_mape
if return_list[min_mape_index].min > min_mape:
return_list[min_mape_index].min = rounded_min_mape
elif return_list[min_mape_index].max < min_mape:
return_list[min_mape_index].max = rounded_min_mape
sum_num = 0
# 集計データから割合を算出
for i in range(return_list.index(benchmark_name)):
sum_num += return_list[i].appearance
for i in range(return_list.index(benchmark_name)):
return_list[i].ratio = int(100 * return_list[i].appearance / sum_num)
# 割合の合計が100になるように調整
exclude_index0_ratios = 0
for i in range(return_list.index(benchmark_name)):
if i == 0:
continue
exclude_index0_ratios += return_list[i].ratio
return_list[0].ratio = 100 - exclude_index0_ratios
return return_list
def save_MapeTable(MapeTable, suffix=""):
tmp_table = MapeTable.copy()
columns = MapeTable.columns.to_numpy()
index = MapeTable.index.to_numpy()
for i in range(len(columns)):
for j in range(len(index)):
tmp_table.iat[j, i] = tmp_table.iat[j, i].return_Data()
tmp_table.to_csv(f"./tmp_GenerateResources/MapeTable_{str(suffix)}.csv")
### 引数はx軸となる値のリスト, y軸となる値のリスト, 関数名の文字列
# 返り値は次のようなリスト
# [<関数名の文字列>, <線形モデル>, <対数モデル>, <反比例モデル>, <分岐モデル>]
def return_Model_row_list(
x: list, y: list, function_name: str, test_ratio=0.3, train=False
):
# 変数:model_lin
# 線形モデル
model_lin = ModelLin(x, y, test_ratio=test_ratio)
model_lin.calc_lr()
if train:
model_lin.calc_mape_score_InTrain()
else:
model_lin.calc_mape_score()
# 変数:model_log
# 対数モデル
model_log = ModelLog10(x, y, test_ratio=test_ratio)
model_log.calc_lr()
if train:
model_log.calc_mape_score_InTrain()
else:
model_log.calc_mape_score()
# 変数:model_ip
# 反比例モデル
model_ip = ModelIP(x, y, test_ratio=test_ratio)
model_ip.calc_lr()
if train:
model_ip.calc_mape_score_InTrain()
else:
model_ip.calc_mape_score()
# 変数:model_branch
# 特異点付き条件分岐モデル
model_branch = ModelBranch(x, y, test_ratio=test_ratio)
model_branch.calc_lr()
if train:
model_branch.calc_mape_score_InTrain()
else:
model_branch.calc_mape_score()
# 変数:return_list
# 返り値となるリスト
return_list = [function_name, model_lin, model_log, model_ip, model_branch]
return return_list
# 引数は、収集するベンチマークのリスト、実行したい学習の割合、固定したいベンチマーククラス
# 返値は無し
# 実行すると、"./tmpGenerateResources" に "<ベンチマーク名>_FixedClassTrain_<テスト割合>.csv" という形式でファイルが生成される
def GenerateMapeTableFixClass(
Benchmarks=["cg"], TestRatios=["0.3"], FixBenchmarksClass="C"
):
for test_ratio in TestRatios:
print(f"test_ratio={test_ratio}")
print(
f"train_list, test_list = {split_by_ratio(processes_excludeBTSP, test_ratio)} on processes_excludeBTSP"
)
print(
f"train_list, test_list = {split_by_ratio(processes_onlyBTSP, test_ratio)} on processes_onlyBTSP"
)
print(f"\n")
fixed_class_list = [0] * len(Benchmarks)
for i in range(len(Benchmarks)):
if Benchmarks[i] == "bt" or Benchmarks[i] == "sp":
process = processes_onlyBTSP
else:
process = processes_excludeBTSP
fixed_class_list[i] = return_fixed_class(
BenchMark=Benchmarks[i],
Processes=processes,
FixedBenchMarkClass=FixBenchmarksClass,
)
fixed_class_DataFrame = [0] * len(fixed_class_list)
for i in range(len(fixed_class_list)):
fixed_class_DataFrame[i] = pd.concat(fixed_class_list[i], axis=1)
for i in range(len(fixed_class_DataFrame)):
dict_data = return_dict_Data(fixed_class_DataFrame[i])
MapeTable_per_benchmark = return_MapeTable_per_benchmark(
dict_data, test_ratio=test_ratio, train=True
)
MapeTable_per_benchmark.to_csv(
f"./tmp_GenerateResources/{Benchmarks[i]}_FixedClassTrain_{test_ratio}.csv"
)
# 使用例
# GenerateMapeTableFixClass(Benchmarks=["cg", "lu"], TestRatios=[0.3, 0.7], FixBenchmarksClass="C")
# 引数は、収集するベンチマークのリスト、実行したい学習の割合、固定したい実行プロセス数
# 返値は無し
# 実行すると、"./tmpGenerateResources" に "<ベンチマーク名>_FixedProcessTrain_<テスト割合>.csv" という形式でファイルが生成される
def GenerateMapeTableFixProcess(
Benchmarks=["cg"],
BenchmarkClasses=["A", "B", "C", "D"],
BenchmarkClasses_on_num=[1, 4, 16, 256],
TestRatios=["0.3"],
FixProcess=64,
):
for test_ratio in TestRatios:
print(f"test_ratio={test_ratio}")
print(
f"train_list, test_list = {split_by_ratio(BenchmarkClasses, test_ratio)} on BenchmarkClasses"
)
print(f"\n")
fixed_Process_list = [0] * len(benchmarks)
for i in range(len(fixed_Process_list)):
fixed_Process_list[i] = return_fixed_process(
BenchMark=Benchmarks[i],
BenchMarkClasses=BenchmarkClasses,
FixedProcess=FixProcess,
)
fixed_Process_DataFrame = [0] * len(fixed_Process_list)
for i in range(len(fixed_Process_DataFrame)):
fixed_Process_DataFrame[i] = pd.concat(fixed_Process_list[i], axis=1)
for i in range(len(fixed_Process_DataFrame)):
dict_data = return_dict_Data(fixed_Process_DataFrame[i])
dict_data["rowData"] = BenchmarkClasses_on_num
try:
MapeTable_per_benchmark = return_MapeTable_per_benchmark(
dict_data, test_ratio=test_ratio, train=True
)
except:
print(f"MAPEを算出するのに問題発生@{Benchmarks[i]}")
continue
MapeTable_per_benchmark.to_csv(
f"./tmp_GenerateResources/{benchmarks[i]}_FixedProcessTrain_{test_ratio}.csv"
)
def ConvertBencharkClass_inNPB(Alphabet: str):
if Alphabet == "A":
return 1
elif Alphabet == "B":
return 4
elif Alphabet == "C":
return 16
elif Alphabet == "D":
return 256
else:
return False
def ConvertBencharkClass_inNPB_fromNum(number):
number = int(number)
if number == 1:
return "A"
elif number == 4:
return "B"
elif number == 16:
return "C"
elif number == 256:
return "D"
else:
return False
def ConvertBenchmarkClasses(InputList=["A", "B", "C", "D"]):
ReturnList = []
for content in InputList:
ReturnList.append(ConvertBencharkClass_inNPB(content))
return ReturnList
def return_OptimalProcessesList(BenchmarkName="cg"):
if BenchmarkName == "bt" or BenchmarkName == "sp":
return processes_onlyBTSP
else:
return processes_excludeBTSP
# 引数は、ベンチマーク名、列名、固定する値、プロセスorクラスのどちらで固定するか
# 返値は次のような形式の辞書
# rowData:[プロセス数]or[ベンチマーククラス]
# <関数名>:[実行回数]
def returnDictForModelDataFrame(
BenchmarkName="cg", rowData=["A", "B", "C", "D"], fix="64", fixed="Process"
):
if fixed == "Process":
FixedProcessList = return_fixed_process(
BenchMark=BenchmarkName, BenchMarkClasses=rowData, FixedProcess=fix
)
FixedProcessDataFrame = pd.concat(FixedProcessList, axis=1)
DictData = return_dict_Data(FixedProcessDataFrame)
elif fixed == "Class":
FixedClassList = return_fixed_class(
BenchMark=BenchmarkName, Processes=rowData, FixedBenchMarkClass=fix
)
FixedClassDataFrame = pd.concat(FixedClassList, axis=1)
DictData = return_dict_Data(FixedClassDataFrame)
return DictData
# 引数は、returnDictForModelDataFrame()の返値, rowData, 関数名のリスト, テストに割り当てる割合
# 返値はリスト
# [<関数名>, <学習済みデータ1>, ... , <学習済みデータn>]
def return_ModelDataSourceList(DictData, x_list, Index, test_ratio=0.3):
ModelDataSourceList = []
for FunctionName in Index:
y_list = DictData[FunctionName]
if does_include_nan(y_list):
continue
ModelDataSourceList.append(
return_Model_row_list(
x=x_list,
y=y_list,
function_name=FunctionName,
test_ratio=test_ratio,
train=True,
)
)
return ModelDataSourceList
# 引数は、関数名、rowData, テストに割り当てる割合, 固定するプロセス数orクラス, クラスで固定するかプロセス数で固定するか
# 返値はDataFrame
# 行名は関数名で、列名はモデルの種別
# それぞれの要素は学習済みデータ
def return_Models_per_Benchmark(
BenchmarkName="cg", rowData=[1, 4, 16, 256], TestRate=0.3, fix="C", fixed="Class"
):
# 変数:MapeTable
# ベンチマークのMAPE表
# fixedが"Class"ならクラスで固定され、fixedが"Process"ならプロセス数で固定されたMAPE表を読み込んでいる。
MapeTable = pd.read_csv(
f"./tmp_GenerateResources/{BenchmarkName}_Fixed{fixed}Train_{test_ratio}.csv"
)
MapeTable = MapeTable.set_index("function name")
# 変数:MapeTableColumns, MapTableIndex
# MapeTableの列名・行名
MapeTableColumns = MapeTable.columns.to_numpy()
MapeTableIndex = MapeTable.index.to_numpy()
# 変数:ModelDataFrame
# MapeTableにおける各関数の学習済みモデルが格納される
checked_rowData = rowData
if fixed == "Process":
checked_rowData = ConvertBenchmarkClasses(rowData)
DictData = returnDictForModelDataFrame(
BenchmarkName, rowData=rowData, fix=fix, fixed=fixed
)
ModelDataFrameSourceList = return_ModelDataSourceList(
DictData=DictData,
x_list=checked_rowData,
Index=MapeTableIndex,
test_ratio=TestRate,
)
ModelDataFrameSourceListCollumnsName = [
"FunctionName",
"ModelLin",
"ModelLog",
"ModelIp",
"ModelBranch",
]
ModelDataFrame = pd.DataFrame(ModelDataFrameSourceList)
ModelDataFrame.columns = ModelDataFrameSourceListCollumnsName
ModelDataFrame = ModelDataFrame.set_index("FunctionName")
return ModelDataFrame
# 引数は、読み込んだベンチマークごとのMAPE表, 各関数の全てのモデルにおける学習済みモデル
# 返値は辞書
# キーは<関数名>でバリューは<学習済みモデル>
def return_BestModelsDict(MapeTable, ModelDataFrame):
BestModelsDict = {}
ModelNames = ModelDataFrame.columns.to_list()
ModelDataFrameIndexNameList = ModelDataFrame.index.to_numpy()
for FunctionName in ModelDataFrameIndexNameList:
MapeInFunction = MapeTable.loc[FunctionName].to_list()
SmallestModelIndex = MapeInFunction.index(min(MapeInFunction))
SmallestModelName = ModelNames[SmallestModelIndex]
BestModelsDict[FunctionName] = ModelDataFrame.at[
FunctionName, SmallestModelName
]
return BestModelsDict
BenchmarkClasses = ["A", "B", "C", "D"]
# 引数は
# 返値は辞書
# キーは<関数名>でバリューは<学習済みモデル>の辞書
# 学習済みモデルのデータフレームの取得に失敗した場合はFalseを返す
def generateBestModelDict(
TestRatio=0.3,
BenchmarkName="ft",
fixed="Class",
fix="C",
rowData=[1, 2, 4, 8, 16, 32, 64, 128, 256],
):
# 変数:MapeTable
# ベンチマークのMAPE表
if fixed == "Class":
file_name = f"{BenchmarkName}_FixedClassTrain_{TestRatio}.csv"
else:
file_name = f"{BenchmarkName}_FixedProcessTrain_{TestRatio}.csv"
file_path = f"./tmp_GenerateResources/{file_name}"
MapeTable = pd.read_csv(file_path)
MapeTable = MapeTable.set_index("function name")
# try:
# ModelDataFrame = return_Models_per_Benchmark(BenchmarkName=benchmark, rowData=processes, TestRate=test_ratio, fix=fix, fixed=fixed)
# except:
# print(f"\n全てのモデル形式で学習済みモデルを作成しているor集めている最中に問題が発生しました@{benchmark}\n")
# return False
ModelDataFrame = return_Models_per_Benchmark(
BenchmarkName=benchmark,
rowData=rowData,
TestRate=test_ratio,
fix=fix,
fixed=fixed,
)
BestModelsDict = return_BestModelsDict(
MapeTable=MapeTable, ModelDataFrame=ModelDataFrame
)
return BestModelsDict
# 引数は、テストの割合のリスト, rowData
# 返値はrowDataのtrainのリストを文字列化したもののリスト
def return_StringTrainList(TestRatio=[0.3, 0.5], rowData=[1, 2, 4, 8]):
returnList = []
for test_ratio in TestRatio:
train_list, test_list = split_by_ratio(base_list=rowData, test_ratio=test_ratio)
returnList.append(f"{train_list}")
return returnList
# 引数は、関数名, 予測値, ベンチマーク名, ベンチマーククラス, 実行プロセス数
# 返値は予測値と実測値の誤差率(= 予測値/実測値 * 100)
def return_ErrorRate(
FunctionName="CG",
PredictNum=256,
BenchmarkName="cg",
BenchmarkClass="D",
Process=256,
):
target_csv = pd.read_csv(
f"./csv_files/pprof_{BenchmarkName}{BenchmarkClass}{Process}.csv"
)
target_csv = target_csv.set_index("Name")
try:
RealNum = target_csv.loc[FunctionName, "#Call"]
except:
print(f"該当する関数はありませんでした@{Benchmakname}@{FunctionName}")
RealNum = False
if RealNum != False:
returnNum = abs(RealNum - PredictNum) / RealNum * 100
return returnNum
else:
return False
# 予測値を返す関数
# 引数は、学習済みモデル, 予測したいプロセス数もしくはベンチマーククラスを数値化したもの
def return_Predicted(LearnedModel, num):
Input = np.reshape(num, (-1, 1))
PredictedData = LearnedModel.predict(Input)
PredictedList = PredictedData.tolist()
Predict = PredictedList[0][0]
return Predict
# return_FixedClassModelDF
# 引数で指定されたベンチマークでベンチマーククラスを固定した際の各関数について最適な学習済みモデルを返す関数
def return_FixedClassModelDF(benchmark="cg", FixClass="C"):
ProcessExcludeBTSP = [1, 2, 4, 8, 16, 32, 64, 128, 256]
ProcessIncludeBTSP = [1, 4, 16, 64, 256]
TestRates = [0, 0.2, 0.4, 0.6, 0.8, 0.9]
if benchmark == "bt" or benchmark == "sp":
processes = ProcessIncludeBTSP
TestRates = [0, 0.2, 0.4, 0.6, 0.8]
else:
processes = ProcessExcludeBTSP
RawDataFrameSource = return_fixed_class(
BenchMark=benchmark, Processes=processes, FixedBenchMarkClass=FixClass
)
RawDataFrame = pd.concat(RawDataFrameSource, axis=1)
x_list = RawDataFrame.columns.tolist()
FunctionNames = RawDataFrame.index.tolist()
ModelsInBenchmark = {}
for FunctionName in FunctionNames:
BestModelsPerFunction = [0] * len(TestRates)
y_list = RawDataFrame.loc[FunctionName].tolist()
for test_ratio in TestRates:
x_list_splited = split_by_ratio(x_list, test_ratio)[0]
y_list_splited = split_by_ratio(y_list, test_ratio)[0]
if does_include_nan(y_list):
continue
Models = return_Model_row_list(
x=x_list_splited,
y=y_list_splited,
function_name=FunctionName,
test_ratio=0,
train=True,
)[1:]
MapeScoreInTrains = [0] * len(Models)
for i in range(len(Models)):
MapeScoreInTrains[i] = Models[i].mape_score_InTrain
BestModelsPerFunction[TestRates.index(test_ratio)] = Models[
MapeScoreInTrains.index(min(MapeScoreInTrains))
]
if 0 in BestModelsPerFunction:
continue
ModelsInBenchmark[FunctionName] = BestModelsPerFunction
ModelDF = pd.DataFrame.from_dict(ModelsInBenchmark, orient="index")
ModelDFcolumns = []
for test_ratio in TestRates:
ModelDFcolumns.append(f"{split_by_ratio(x_list, test_ratio)[0]}")
ModelDF.columns = ModelDFcolumns
return ModelDF
# return_FixedProcessModelDF
# 引数で指定されたベンチマークで実行プロセス数を固定した際の各関数について最適な学習済みモデルを返す関数
def return_FixedProcessModelDF(benchmark="cg", FixProcess="64"):
BenchmarkClasses = ["A", "B", "C", "D"]
TestRates = [0, 0.4, 0.5, 0.8]
BenchmarkClassesOnNum = ConvertBenchmarkClasses(BenchmarkClasses)
RawDataFrameSource = return_fixed_process(
BenchMark=benchmark, BenchMarkClasses=BenchmarkClasses, FixedProcess=FixProcess
)
RawDataFrame = pd.concat(RawDataFrameSource, axis=1)
x_list_base = RawDataFrame.columns.tolist()
x_list = ConvertBenchmarkClasses(copy.deepcopy(x_list_base))
FunctionNames = RawDataFrame.index.tolist()
ModelsInBenchmark = {}
for FunctionName in FunctionNames:
BestModelsPerFunction = [0] * len(TestRates)
y_list = RawDataFrame.loc[FunctionName].tolist()
for test_ratio in TestRates:
x_list_splited = split_by_ratio(x_list, test_ratio)[0]
y_list_splited = split_by_ratio(y_list, test_ratio)[0]
if does_include_nan(y_list):
continue
Models = return_Model_row_list(
x=x_list_splited,
y=y_list_splited,
function_name=FunctionName,
test_ratio=0,
train=True,
)[1:]
MapeScoreInTrains = [0] * len(Models)
for i in range(len(Models)):
MapeScoreInTrains[i] = Models[i].mape_score_InTrain
BestModelsPerFunction[TestRates.index(test_ratio)] = Models[
MapeScoreInTrains.index(min(MapeScoreInTrains))
]
if 0 in BestModelsPerFunction:
continue
ModelsInBenchmark[FunctionName] = BestModelsPerFunction
ModelDF = pd.DataFrame.from_dict(ModelsInBenchmark, orient="index")
ModelDFcolumns = []
for test_ratio in TestRates:
ModelDFcolumns.append(f"{split_by_ratio(x_list_base, test_ratio)[0]}")
ModelDF.columns = ModelDFcolumns
return ModelDF
ModelDF = return_FixedProcessModelDF(benchmark="cg")
def return_MapeTableFixedClass(benchmark="cg"):
DirPath = "./tmp_GenerateResources/"
try:
ModelDFfixedClass = return_FixedClassModelDF(benchmark=benchmark)
except:
print(f"return_FixedClassModelDF({benchmark})の実行に失敗しました")
return -1
ModelDFfixedClass_LowestMape = ModelDFfixedClass
index = ModelDFfixedClass_LowestMape.index.tolist()
columns = ModelDFfixedClass_LowestMape.columns.tolist()
for column in columns:
for row in index:
ModelDFfixedClass_LowestMape.at[row, column] = ModelDFfixedClass.at[
row, column
].mape_score_InTrain
average = ModelDFfixedClass_LowestMape.mean()
average.name = "Average"
ModelDFfixedClass_LowestMape.append(average)
return ModelDFfixedClass_LowestMape
def return_MapeTableFixedProcess(benchmark="cg"):
DirPath = "./tmp_GenerateResources/"
try:
ModelDFfixedProcess = return_FixedProcessModelDF(benchmark=benchmark)
except:
print(f"return_FixedProcessModelDF({benchmark})の実行に失敗しました")
return -1
ModelDFfixedProcess_LowestMape = ModelDFfixedProcess
index = ModelDFfixedProcess_LowestMape.index.tolist()
columns = ModelDFfixedProcess_LowestMape.columns.tolist()
for column in columns:
for row in index:
ModelDFfixedProcess_LowestMape.at[row, column] = ModelDFfixedProcess.at[
row, column
].mape_score_InTrain
average = ModelDFfixedProcess_LowestMape.mean()
average.name = "Average"
ModelDFfixedProcess_LowestMape.append(average)
return ModelDFfixedProcess_LowestMape
# return_MapeTableFixed<Class or Process>
# 引数で渡されたベンチマークについて、<クラス or プロセス>を固定したMAPE表となったデータフレームを返す
def return_MapeTableFixedClass(benchmark="cg", FixClass="C"):
DirPath = "./tmp_GenerateResources/"
try:
ModelDFfixedClass = return_FixedClassModelDF(
benchmark=benchmark, FixClass=FixClass
)
except:
print(f"return_FixedClassModelDF({benchmark})の実行に失敗しました")
return -1
ModelDFfixedClass_LowestMape = ModelDFfixedClass
index = ModelDFfixedClass_LowestMape.index.tolist()
columns = ModelDFfixedClass_LowestMape.columns.tolist()
for column in columns:
for row in index:
ModelDFfixedClass_LowestMape.at[row, column] = ModelDFfixedClass.at[
row, column
].mape_score_InTrain
average = ModelDFfixedClass_LowestMape.mean()
average.name = "Average"
ModelDFfixedClass_LowestMape = ModelDFfixedClass_LowestMape.append(average)
return ModelDFfixedClass_LowestMape
def return_MapeTableFixedProcess(benchmark="cg", FixProcess="64"):
DirPath = "./tmp_GenerateResources/"
try:
ModelDFfixedProcess = return_FixedProcessModelDF(
benchmark=benchmark, FixProcess=FixProcess
)
except:
print(f"return_FixedProcessModelDF({benchmark})の実行に失敗しました")
return -1
ModelDFfixedProcess_LowestMape = ModelDFfixedProcess
index = ModelDFfixedProcess_LowestMape.index.tolist()
columns = ModelDFfixedProcess_LowestMape.columns.tolist()
for column in columns:
for row in index:
ModelDFfixedProcess_LowestMape.at[row, column] = ModelDFfixedProcess.at[
row, column
].mape_score_InTrain
average = ModelDFfixedProcess_LowestMape.mean()
average.name = "Average"
ModelDFfixedProcess_LowestMape = ModelDFfixedProcess_LowestMape.append(average)
return ModelDFfixedProcess_LowestMape
# Mape表をCSVとして保存する関数
# 引数はbenchmark:ベンチマーク名, FixClass:固定するベンチマーククラス, FixProcess:固定するプロセス数, DirPath:保存するディレクトリ
def SaveMapeTables(FixClass="B", FixProcess=256, DirPath="./tmp_GenerateResources/"):
for benchmark in benchmarks:
MapeTableFixedClass = return_MapeTableFixedClass(benchmark, FixClass="B")
MapeTableFixedProcess = return_MapeTableFixedProcess(benchmark, FixProcess=256)
FixedClassFileName = f"MapeTableFixedClass_{benchmark}{FixClass}.csv"
FixedProcessFileName = f"MapeTableFixedProcess_{benchmark}{FixProcess}.csv"
# print(f"FixedClassFileName={FixedClassFileName}, MapeTableFixedProcess={FixedProcessFileName}")
if type(MapeTableFixedClass) is pd.core.frame.DataFrame:
# print(f"FixedClassFileName={FixedClassFileName}")
MapeTableFixedClass.to_csv(f"{DirPath}{FixedClassFileName}")
if type(MapeTableFixedProcess) is pd.core.frame.DataFrame:
# print(f"FixedProcessFileName={FixedProcessFileName}")
MapeTableFixedProcess.to_csv(f"{DirPath}{FixedProcessFileName}")
# SaveMapeTables(FixClass="C", FixProcess=256)
# クラス:TimeData
# 下記の関数(= return_TimeDataList())のために作成された
# メソッドreturn_AllData()で値のすべてを辞書形式で受けてることができる
class TimeData:
def __init__(self, benchmark="cg", process="32", BenchmarkClass="C", time=-1):
self.benchmark = benchmark
self.process = process
self.BenchmarkClass = BenchmarkClass
self.time = time
def return_AllData(self):
return {
"benchmark": self.benchmark,
"process": self.process,
"BenchmarkClass": self.BenchmarkClass,
"time": self.time,
}
# 返値に独自クラスTimeDataのリストを返す
# 引数に実行プロセス数を取る
# 返値のリストの要素は引数のリストのプロセス数で実行されたベンチマークの
# ベンチマーク名・実行プロセス数・ベンチマーククラス・実行時間が記録されたTimeDataクラスのインスタンス
def return_TimeDataList(process=256):
return_list = []
with open(f"./toGetProfile/toGetTime/TimeWith{process}.txt") as f:
line_count = 0
for line in f:
line_count += 1
if line_count % 3 == 1:
benchmark = line[1:3].lower()
# print(f"benchmark={benchmark}, len(benchmark)={len(benchmark)}")
Data = TimeData(benchmark=benchmark, process=process)
if line_count % 3 == 2:
BenchmarkClass = line[-2]
# print(f"BenchmarkClass={BenchmarkClass}, len(BenchmarkClass)={len(BenchmarkClass)}")
Data.BenchmarkClass = BenchmarkClass
if line_count % 3 == 0:
Time = line[-25:]
Time = Time.strip()
# print(f"Time={Time}")
Data.time = Time
return_list.append(Data)
return return_list
# 関数:FillCSV()
# 引数は,benchmark:ベンチマーク名, process:実行プロセス数, BenchmarkClass:ベンチマーククラス, time:実行時間
# 引数として渡された値を適切なCSVに保存する
def FillCSV(benchmark="cg", process=256, BenchmarkClass="A", time="0.04"):
time = float(time)
CSVFilename = f"./csv_files/ExecTime@{benchmark}.csv"
DataFrame = pd.read_csv(CSVFilename, index_col=0)
DataFrame.at[BenchmarkClass, process] = time
DataFrame.to_csv(CSVFilename)
# 誤差率の表を作成する関数
# 引数は,benchmark:関数名, predict_class:予測したいクラス, predict_process:予測したいプロセス数, FixProcess:固定する実行プロセス数, FixClass:固定するベンチマーククラス
# 返り値はなし
def GenerateErrorRateTable(
benchmark="cg", predict_class="D", predict_process=256, FixProcess=64, FixClass="B"
):
DirName = "./table_LatexForm/"
FileSuffix = f".table"
FilePrefix = f"ErrorRateTable@{benchmark}Fixed"
try:
ErrorRateFixedProcessDF = return_ErrorRateFixedProcessDF(
benchmark=benchmark, FixProcess=FixProcess, predict_class=predict_class
)
except:
print("実行プロセスを固定した際の誤差率の表を取得するのに失敗したので、CSVとして保存できませんでした。")
ErrorRateFixedProcessDF.to_latex(f"{DirName}{FilePrefix}{FixProcess}{FileSuffix}")
try:
ErrorRateFixedClassDF = return_ErrorRateFixedClassDF(
benchmark=benchmark, FixClass=FixClass, predict_process=predict_process
)
except:
print("ベンチマーククラスを固定した際の誤差率の表を取得するのに失敗したので、CSVとして保存できませんでした。")
ErrorRateFixedClassDF.to_latex(f"{DirName}{FilePrefix}{FixClass}{FileSuffix}")
# 誤差率のデータフレームを返す関数
### return_ErrorRateFixedProcessDF(), return_ErrorRateFixedClassDF()
def return_ErrorRateFixedProcessDF(benchmark="cg", FixProcess=64, predict_class="D"):
FixProcessDF = return_FixedProcessModelDF(benchmark=benchmark, FixProcess=256)
# print(FixProcessDF)
for index in FixProcessDF.index.tolist():
for column in FixProcessDF.columns.tolist():
PredictNum = return_Predicted(
FixProcessDF.at[index, column],
ConvertBencharkClass_inNPB(predict_class),
)
FixProcessDF.at[index, column] = (
int(
return_ErrorRate(
FunctionName=index,
PredictNum=PredictNum,
BenchmarkName=benchmark,
BenchmarkClass=predict_class,
Process=FixProcess,
)
* 1000
)
/ 1000
)
return FixProcessDF
def return_ErrorRateFixedClassDF(benchmark="cg", FixClass="B", predict_process=256):
FixClassDF = return_FixedClassModelDF(benchmark, FixClass=FixClass)
# print(FixClassDF)
for index in FixClassDF.index.tolist():
for column in FixClassDF.columns.tolist():
PredictNum = return_Predicted(FixClassDF.at[index, column], predict_process)
FixClassDF.at[index, column] = (
int(
return_ErrorRate(
FunctionName=index,
PredictNum=PredictNum,
BenchmarkName=benchmark,
BenchmarkClass=FixClass,
Process=predict_process,
)
* 1000
)
/ 1000
)
return FixClassDF
# for benchmark in benchmarks:
# GenerateErrorRateTable(benchmark=benchmark)
processes_onlyBTSP = [1, 4, 16, 64, 256]
processes_excludeBTSP = [1, 2, 4, 8, 16, 32, 64, 128, 256]
# 関数:reutrn_MapeTableRowDataframe_FixedClass
# 引数:benchmark=<ベンチマーク名>, FixedClass=<固定するクラス>, test_ratio=<テストに用いる割合>
# 返値:引数の指定通りの設定で、各モデルの採用割合, 最大値, 最小値が保持された独自クラスを要素としたデータフレーム
def reutrn_MapeTableRowDataframe_FixedClass(
benchmark="cg", FixedClass="B", test_ratio=0.3, enableTrain=True
):
if benchmark == "bt" or benchmark == "sp":
process = processes_onlyBTSP
else:
process = processes_excludeBTSP
FixedClassList = return_fixed_class(
BenchMark=benchmark, Processes=process, FixedBenchMarkClass=FixedClass
)
FixedClassDataFrame = pd.concat(FixedClassList, axis=1)
DictData = return_dict_Data(FixedClassDataFrame)
MapeTablePerBenchmark = return_MapeTable_per_benchmark(
DictData, test_ratio=test_ratio, train=enableTrain
)
MapeTableRow = return_MapeTable_row(MapeTablePerBenchmark, benchmark)
MapeTableSource = MapeTableRow
MapeTableSourceColumnName = ["線形モデル", "対数モデル", "反比例モデル", "分岐モデル", "ベンチマーク名"]
MapeTable = pd.DataFrame(MapeTableSource)
MapeTable = MapeTable.T
MapeTable.columns = MapeTableSourceColumnName
MapeTable = MapeTable.set_index("ベンチマーク名")
return MapeTable
# 関数:return_MapeTableDataframe_FixedClass()
# 引数:FixedClass=<固定するベンチマーククラス>, test_ratio=<テストに用いる割合>
# 返値:引数の指定通りの設定で全てのベンチマークに関する、各モデルの採用割合、最大値、最小値が保持された独自クラスを要素としたデータフレーム
def return_MapeTableDataframe_FixedClass(FixedClass="B", test_ratio=0.3):
MapeTableRowDataframes_list = []
for benchmark in benchmarks:
MapeTableRowDataframe = reutrn_MapeTableRowDataframe_FixedClass(
benchmark=benchmark,
FixedClass=FixedClass,
test_ratio=test_ratio,
enableTrain=True,
)
MapeTableRowDataframes_list.append(MapeTableRowDataframe)
MapeTableDataframe = pd.concat(MapeTableRowDataframes_list)
return MapeTableDataframe
# 関数:return_MapeTableOnlyStrDataframe()
# 引数:return_MapeTableDataframe_Fixed<Class or Process>()の返値
# 返値:文字列を要素としたMape表のデータフレーム
def return_MapeTableOnlyStrDataframe(input_MapeTableDF):
index = input_MapeTableDF.index
columns = input_MapeTableDF.columns
return_DF = pd.DataFrame().reindex_like(input_MapeTableDF).astype("str")
for i in index:
for j in columns:
contentStr = input_MapeTableDF.at[i, j].return_Data()
return_DF.at[i, j] = contentStr
return return_DF
# 利用例
# input_MapeTableDF = return_MapeTableDataframe_FixedClass()
# print(return_MapeTableOnlyStrDataframe(input_MapeTableDF=input_MapeTableDF))
# 関数:reutrn_MapeTableRowDataframe_FixedProcess
# 引数:benchmark=<ベンチマーク名>, FixedProcess=<固定する実行プロセス>, test_ratio=<テストに用いる割合>
# 返値:引数の指定通りの設定で、各モデルの採用割合, 最大値, 最小値が保持された独自クラスを要素としたデータフレーム
def reutrn_MapeTableRowDataframe_FixedProcess(
benchmark="cg", FixedProcess=256, test_ratio=0.3, enableTrain=True
):
# classes[2:]としているのは、変換関数(ConvertBenchmarkClasses)がベンチマーククラスS, Wの数値化に対応していないため
classes_inFunc = classes[2:]
classes_onNum = ConvertBenchmarkClasses(classes_inFunc)
FixedProcessList = return_fixed_process(
BenchMark=benchmark, BenchMarkClasses=classes_inFunc, FixedProcess=FixedProcess
)
FixedProcessDataFrame = pd.concat(FixedProcessList, axis=1)
DictData = return_dict_Data(FixedProcessDataFrame)
DictData["rowData"] = classes_onNum
MapeTablePerBenchmark = return_MapeTable_per_benchmark(
DictData, test_ratio=test_ratio, train=enableTrain
)
MapeTableRow = return_MapeTable_row(MapeTablePerBenchmark, benchmark)
MapeTableSource = MapeTableRow
MapeTableSourceColumnName = ["線形モデル", "対数モデル", "反比例モデル", "分岐モデル", "ベンチマーク名"]
MapeTable = pd.DataFrame(MapeTableSource)
MapeTable = MapeTable.T
MapeTable.columns = MapeTableSourceColumnName
MapeTable = MapeTable.set_index("ベンチマーク名")
return MapeTable
# 関数:return_MapeTableDataframe_FixedProcess()
# 引数:FixedProcess=<固定する実行プロセス数>, test_ratio=<テストに用いる割合>
# 返値:引数の指定通りの設定で全てのベンチマークに関する、各モデルの採用割合、最大値、最小値が保持された独自クラスを要素としたデータフレーム
def return_MapeTableDataframe_FixedProcess(FixedProcess=256, test_ratio=0.3):
MapeTableRowDataframes_list = []
for benchmark in benchmarks:
MapeTableRowDataframe = reutrn_MapeTableRowDataframe_FixedProcess(
benchmark=benchmark,
FixedProcess=FixedProcess,
test_ratio=test_ratio,
enableTrain=True,
)
MapeTableRowDataframes_list.append(MapeTableRowDataframe)
MapeTableDataframe = pd.concat(MapeTableRowDataframes_list)
return MapeTableDataframe
# 利用例
# input_MapeTableDF = return_MapeTableDataframe_FixedProcess()
# print(return_MapeTableOnlyStrDataframe(input_MapeTableDF=input_MapeTableDF))
# 関数:return_ErrorRateFixed<Class or Process>DFwithAverage()
# 引数:benchmark:ベンチマーク名, FixProcess:固定するプロセス数, predict_class:予測するクラス
# 引数:benchmark:ベンチマーク名, FixClass:固定するクラス数, predict_process:予測するプロセス数
# 返り値はエラー率の平均が付与されたエラー率の表
def return_ErrorRateFixedProcessDFwithAverate(
benchmark="cg", FixProcess=64, predict_class="D"
):
ErrorRateFixedProcessDF = return_ErrorRateFixedProcessDF(
benchmark=benchmark, FixProcess=FixProcess, predict_class=predict_class
)
average = ErrorRateFixedProcessDF.mean()
average.name = "Average"
return ErrorRateFixedProcessDF.append(average)
def return_ErrorRateFixedClassDFwithAverage(
benchmark="cg", FixClass="B", predict_process=256
):
ErrorRateFixedClsssDF = return_ErrorRateFixedClassDF(
benchmark=benchmark, FixClass=FixClass, predict_process=predict_process
)
average = ErrorRateFixedClsssDF.mean()
average.name = "Average"
return ErrorRateFixedClsssDF.append(average)
# 関数:convert_StrListToIntList()
# 引数:要素はすべて整数なリストのプリント出力
# 返り値:数値のリスト
def convert_StrListToIntList(InputList: list):
InputList = InputList[1:-1]
ReturnList = [int(x.strip()) for x in InputList.split(",")]
return ReturnList
# 関数:convert_StrListToStrList()
# 引数:要素はすべて一文字のアルファベットなリストのプリント出力
# 返り値:アルファベットのリスト
def convert_StrListToStrList(InputList: list):
InputList = InputList[1:-1]
InputList = InputList.replace("'", "")
ReturnList = [x.strip() for x in InputList.split(",")]
return ReturnList
# 関数:return_ExecTime()
# 引数:ベンチマーク名, ベンチマーククラス, 実行プロセス数
# 返り値:引数に該当するベンチマークの実行時間
def return_ExecTime(benchmark="cg", BenchmarkClass="B", Process=256):
FileDir = "./csv_files/"
FileName = f"ExecTime@{benchmark}.csv"
ExecTimeDF = pd.read_csv(f"{FileDir}{FileName}", index_col=0)
TargetNum = ExecTimeDF.at[BenchmarkClass, f"{Process}"]
if np.isnan(TargetNum):
TargetNum = -1
return TargetNum
# 関数:return_FixClassCost()
# 引数:benchmark=ベンチマーク名, ProcessList=実行プロセスのリスト, BenchmarkClass=ベンチマーククラス
# 返り値:引数の条件に当てはまるコスト
def return_FixClassCost(benchmark="cg", ProcessList=[1, 2, 4], BenchmarkClass="B"):
cost = 0
for process in ProcessList:
ExecTime = return_ExecTime(
benchmark=benchmark, BenchmarkClass=BenchmarkClass, Process=process
)
if ExecTime < 0:
return -1
cost += ExecTime * process
return cost
# 関数:return_FixProcessCost()
# 引数:benchmark=ベンチマーク名, BenchmarkClassList=ベンチマーククラスのリスト, Process=実行プロセス
# 返り値:引数の条件に当てはまるコスト
def return_FixProcessCost(
benchmark="cg", BenchmarkClassList=["A", "B", "C"], Process=256
):
cost = 0
for BenchmarkClass in BenchmarkClassList:
ExecTime = return_ExecTime(
benchmark=benchmark, BenchmarkClass=BenchmarkClass, Process=Process
)
if ExecTime < 0:
return -1
cost += ExecTime * Process
return cost
def return_ErrorRateFixedClassAverageAndCosts(
benchmark="cg", FixClass="B", predict_process=256
):
ErrorRateFixedClassDFwithAverage = return_ErrorRateFixedClassDFwithAverage(
benchmark=benchmark, FixClass=FixClass, predict_process=predict_process
)
ErrorRateFixedClassAverageSeries = ErrorRateFixedClassDFwithAverage.loc["Average"]
ErrorRateFixedClassAverageDF = pd.DataFrame(ErrorRateFixedClassAverageSeries)
index = ErrorRateFixedClassAverageDF.index.tolist()
columns = ErrorRateFixedClassAverageDF.columns.tolist()
listedIndex = []
for i in index:
listedIndex.append(convert_StrListToIntList(i))
Costs = []
for i in listedIndex:
Costs.append(
return_FixClassCost(
benchmark=benchmark, ProcessList=i, BenchmarkClass=FixClass
)
)
ErrorRateFixedClassAverageDF["PredictCosts"] = Costs
return ErrorRateFixedClassAverageDF
def return_ErrorRateFixedProcessAverageAndCosts(
benchmark="cg", FixProcess=256, predict_class="D"
):
ErrorRateFixedProcessDFwithAverage = return_ErrorRateFixedProcessDFwithAverate(
benchmark=benchmark, FixProcess=FixProcess, predict_class=predict_class
)
ErrorRateFixedProcessAverageSeries = ErrorRateFixedProcessDFwithAverage.loc[
"Average"
]
ErrorRateFixedProcessAverageDF = pd.DataFrame(ErrorRateFixedProcessAverageSeries)
index = ErrorRateFixedProcessAverageDF.index.tolist()
columns = ErrorRateFixedProcessAverageDF.columns.tolist()
listedIndex = []
for i in index:
listedIndex.append(convert_StrListToStrList(i))
Costs = []
for i in listedIndex:
Costs.append(
return_FixProcessCost(
benchmark=benchmark, BenchmarkClassList=i, Process=FixProcess
)
)
ErrorRateFixedProcessAverageDF["PredictCosts"] = Costs
return ErrorRateFixedProcessAverageDF
# benchmark="cg"
# FixProcess=64
# predict_class="D"
# FixClass="B"
# predict_process=256
# BenchmarkClass="B"
# Process=64
def return_ErrorRateFixedClass_AveragePredictCostRealCost(
benchmark="cg", FixClass="B", predict_process=256
):
ErrorRateFixedClass = return_ErrorRateFixedClassAverageAndCosts(
benchmark=benchmark, FixClass=FixClass, predict_process=predict_process
)
index = ErrorRateFixedClass.index.tolist()
columns = ErrorRateFixedClass.columns.tolist()
RealClass = FixClass
RealProcess = predict_process
RealTime = return_ExecTime(
benchmark=benchmark, BenchmarkClass=RealClass, Process=RealProcess
)
RealCost = RealProcess * RealTime
RealCostList = [RealCost] * len(index)
ErrorRateFixedClass["RealCost"] = RealCostList
return ErrorRateFixedClass
def return_ErrorRateFixedProcess_AveragePredictCostRealCost(
benchmark="cg", FixProcess=256, predict_class="D"
):
ErrorRateFixedProcess = return_ErrorRateFixedProcessAverageAndCosts(
benchmark=benchmark, FixProcess=FixProcess, predict_class=predict_class
)
index = ErrorRateFixedProcess.index.tolist()
columns = ErrorRateFixedProcess.columns.tolist()
RealClass = predict_class
RealProcess = FixProcess
RealTime = return_ExecTime(
benchmark=benchmark, BenchmarkClass=RealClass, Process=RealProcess
)
RealCost = RealProcess * RealTime
RealCostList = [RealCost] * len(index)
ErrorRateFixedProcess["RealCost"] = RealCostList
return ErrorRateFixedProcess
def GenerateMapeTable(FixedClass="B", FixedProcess="64", test_ratio=0.3):
DirName = "./table_LatexForm/"
FileSuffix = f"Train0{int(test_ratio*10)}.table"
FilePrefix = "MapeTableFixed"
input_MapeTableDF = return_MapeTableDataframe_FixedProcess(
FixedProcess=FixedProcess, test_ratio=test_ratio
)
return_MapeTableDF = return_MapeTableOnlyStrDataframe(
input_MapeTableDF=input_MapeTableDF
)
return_MapeTableDF.to_latex(f"{DirName}{FilePrefix}{FixedProcess}{FileSuffix}")
input_MapeTableDF = return_MapeTableDataframe_FixedClass(
FixedClass=FixedClass, test_ratio=test_ratio
)
return_MapeTableDF = return_MapeTableOnlyStrDataframe(
input_MapeTableDF=input_MapeTableDF
)
return_MapeTableDF.to_latex(f"{DirName}{FilePrefix}{FixedClass}{FileSuffix}")
# MAPE表を生成する例
# GenerateMapeTable()
# 関数:ConvertIndexNameToNumOfProfile()
# 引数:行名が使用したプロファイル, 列名が平均誤差率・予測コスト・実測コストとなっているデータフレーム
# 返値:
def ConvertIndexNameToNumOfProfile(inputDF, Fixed="Process"):
index = inputDF.index.tolist()
# ConvertedIndexには入力データフレームの行名から使用されたプロファイルの数を格納している。
ConvertedIndex = [0] * len(index)
for i in range(len(index)):
if Fixed == "Process":
ConvertedIndex[i] = len(convert_StrListToStrList(index[i]))
elif Fixed == "Class":
ConvertedIndex[i] = len(convert_StrListToIntList(index[i]))
returnDF = inputDF.copy(deep=True)
returnDF.index = ConvertedIndex
return returnDF
def GenGraphAveragePerProfileNum(
benchmarks=["cg"],
Fixed="Process",
Fix=64,
Predict="D",
EnableTitle=False,
EnableScatter=False,
SaveGraph=False,
):
plt.figure(figsize=(8, 5))
for benchmark in benchmarks:
if Fixed == "Process":
FixedDF = return_ErrorRateFixedProcess_AveragePredictCostRealCost(
benchmark=benchmark, FixProcess=Fix, predict_class=Predict
)
GraphTitle = f"ベンチマーク{benchmarks}で実行プロセス数を固定"
else:
FixedDF = return_ErrorRateFixedClass_AveragePredictCostRealCost(
benchmark=benchmark, FixClass=Fix, predict_process=Predict
)
GraphTitle = f"ベンチマーク{benchmarks}でベンチマーククラスを固定"
ConvertedIndexFixedDF = ConvertIndexNameToNumOfProfile(FixedDF, Fixed=Fixed)
x = ConvertedIndexFixedDF.index.tolist()
y = ConvertedIndexFixedDF["Average"].tolist()
plt.plot(x, y, label=f"{benchmark.upper()}", marker="o")
if EnableScatter:
plt.scatter(x, y)
plt.legend()
plt.xlabel("使用したプロファイル数")
plt.ylabel("平均絶対誤差率 [%]")
plt.gca().xaxis.set_major_locator(ticker.MaxNLocator(integer=True))
if EnableTitle:
plt.title(GraphTitle)
if Fixed == "Process":
plt.legend(bbox_to_anchor=(1.01, 1), loc="upper left", borderaxespad=0)
plt.subplots_adjust(right=0.9)
plt.yscale("log")
if SaveGraph:
filename = f"Fix{Fixed}AverageError.pdf"
plt.savefig(f"./tmp_GenerateResources/{filename}")
else:
plt.show()
# 使用例
# GenGraphAveragePerProfileNum(benchmarks=benchmarks, Fixed="Process", Fix=64, Predict="D")
# GenGraphAveragePerProfileNum(benchmarks=benchmarks, Fixed="Class", Fix="B", Predict=256)
# for benchmark in benchmarks:
# print(return_ErrorRateFixedClass_AveragePredictCostRealCost(benchmark=benchmark, FixClass="B", predict_process=256))
def GenGraphTotalTimePerProfileNum(
benchmark="cg",
Fixed="Process",
Fix=64,
Predict="D",
EnableTitle=False,
SaveGraph=False,
):
if Fixed == "Process":
FixedDF = return_ErrorRateFixedProcess_AveragePredictCostRealCost(
benchmark=benchmark, FixProcess=Fix, predict_class=Predict
)
GraphTitle = f"ベンチマーク{benchmark}で実行プロセス数を固定"
else:
FixedDF = return_ErrorRateFixedClass_AveragePredictCostRealCost(
benchmark=benchmark, FixClass=Fix, predict_process=Predict
)
GraphTitle = f"ベンチマーク{benchmark}でベンチマーククラスを固定"
ConvertedIndexFixedDF = ConvertIndexNameToNumOfProfile(
inputDF=FixedDF, Fixed="Process"
)
x = ConvertedIndexFixedDF.index.tolist()
y = ConvertedIndexFixedDF["PredictCosts"]
RealCost = ConvertedIndexFixedDF["RealCost"]
plt.figure()
plt.plot(x, y, marker="o", color="blue", label="予測にかかるコスト")
plt.plot(x, RealCost, color="red", label="実測にかかるコスト")
plt.xlabel("使用したプロファイル数")
plt.ylabel("コスト [秒]")
plt.gca().xaxis.set_major_locator(ticker.MaxNLocator(integer=True))
# plt.legend(bbox_to_anchor=(0, -0.15), loc='upper left', borderaxespad=0)
plt.legend(loc="best", borderaxespad=0)
if EnableTitle:
plt.title(GraphTitle)
if SaveGraph:
filename = f"Fix{Fixed}AllTime@{benchmark.upper()}.pdf"
plt.savefig(f"./tmp_GenerateResources/{filename}")
else:
plt.show()
# 使用例
# GenGraphTotalTimePerProfileNum(EnableTitle=True)
# GenGraphTotalTimePerProfileNum(benchmark=benchmark, Fixed="Class", Fix="C", Predict=256, EnableTitle=True)
def return_ErrorRateAverageAndCosts(
benchmark="cg", Fix="B", Predict=256, Fixed="Class"
):
if Fixed == "Class":
FixDF = return_ErrorRateFixedClassAverageAndCosts(
benchmark=benchmark, FixClass=Fix, predict_process=Predict
)
else:
FixDF = return_ErrorRateFixedProcessAverageAndCosts(
benchmark=benchmark, FixProcess=Fix, predict_class=Predict
)
index = FixDF.index.tolist()
NewIndex = []
for i in index:
if Fixed == "Class":
NewIndex.append(len(convert_StrListToIntList(i)))
else:
NewIndex.append(len(convert_StrListToStrList(i)))
FixDF = FixDF.reset_index()
FixDF["index"] = NewIndex
FixDF = FixDF.rename(
columns={"index": "使用したプロファイル数", "Average": "平均誤差率", "PredictCosts": "コスト"}
)
FixDF = FixDF.set_index("使用したプロファイル数")
return FixDF
# # 使用例
# sampleDF = return_ErrorRateAverageAndCosts(benchmark="cg", Fix="B", Predict=256, Fixed="Class")
# print(sampleDF)
# sampleDF = return_ErrorRateAverageAndCosts(benchmark="cg", Fix=64, Predict="D", Fixed="Process")
# print(sampleDF)
# 関数:BestModelsInDF()
# 引数:return_FixedClassModelDF()の返値
# 返値:引数で渡されたDFの要素のモデル名を要素としたDF
def BestModelsInDF(inputDF):
returnDF = pd.DataFrame()
returnDF = returnDF.reindex_like(inputDF).astype("str")
index = returnDF.index.tolist()
columns = returnDF.columns.tolist()
for i in index:
for j in columns:
returnDF.at[i, j] = inputDF.at[i, j].ModelName()
return returnDF
# ### 使用例
# benchmark = "mg"
# FixClass = "B"
# sampleDF = return_FixedClassModelDF(benchmark = benchmark, FixClass = FixClass)
# print(BestModelsInDF(inputDF=sampleDF))
# 実データを取得する関数
benchmark = "cg"
benchmarkClasses = ["A", "B", "C", "D"]
FixedProcess = 64
process_onlyBTSP = [1, 4, 16, 64, 256]
process_excludeBTSP = [1, 2, 4, 8, 16, 32, 64, 128, 256]
functionName = ".TAU_application"
def returnRawDF(
Benchmark="cg",
functionName=".TAU_application",
fix="Process",
benchmarkClass=["A", "B", "C", "D"],
FixedProcess=64,
Processes=[1, 2, 4, 8, 16, 32, 64, 128, 256],
FixedBenchmarkClass="B",
):
if fix == "Process":
fixed = return_fixed_process(
BenchMark=Benchmark,
BenchMarkClasses=benchmarkClass,
FixedProcess=FixedProcess,
)
else:
fixed = return_fixed_class(
BenchMark=Benchmark,
Processes=Processes,
FixedBenchMarkClass=FixedBenchmarkClass,
)
summaryRawData = pd.concat(fixed, axis=1)
return summaryRawData.loc[[functionName]]
# returnRawDF(fix="Process")
# returnRawDF(fix="Class")
# ベンチマークごとの生データを取得する関数
# 引数は returnRawDF() と基本的に同等
# 異なる部分は functionName が存在しないこと
def returnRawDFperBenchmark(
Benchmark="cg",
fix="Process",
benchmarkClass=["A", "B", "C", "D"],
FixedProcess=64,
Processes=[1, 2, 4, 8, 16, 32, 64, 128, 256],
FixedBenchmarkClass="B",
):
if fix == "Process":
fixed = return_fixed_process(
BenchMark=Benchmark,
BenchMarkClasses=benchmarkClass,
FixedProcess=FixedProcess,
)
else:
fixed = return_fixed_class(
BenchMark=Benchmark,
Processes=Processes,
FixedBenchMarkClass=FixedBenchmarkClass,
)
summaryRawData = pd.concat(fixed, axis=1)
return summaryRawData
# # 使用例
# returnRawDFperBenchmark(fix="Process")
# returnRawDFperBenchmark(fix="Classes")
# 引数:inputDF はreturnRawDF()の返り値
# 引数:repeated はDATA列を繰り返す回数
# 引数:includeLastData は学習に最後の値を使うかどうかを指定
def generateInputFileForExtraP(
inputDF=returnRawDF(fix="Class"),
repeated=3,
includeLastData=False,
fileName="tmp_functionName.txt",
):
# https://github.com/extra-p/extrap/blob/master/docs/examples/input.txt
# が入力例のテキストファイルとなっており、これに対応するファイルを作成する。
functionNames = inputDF.index.tolist()
for functionName in functionNames:
tmpfile = open(fileName, "w+")
columns = inputDF.columns.tolist()
tmpfile.write("PARAMETER p\n")
tmpfile.write("POINTS")
if includeLastData == False:
columns = columns[:-1]
for column in columns:
tmpfile.write(f" ({column})")
tmpfile.write("\n\n")
tmpfile.write(f"REGION {functionName}\n")
tmpfile.write("METRIC functionCall\n")
for column in columns:
tmpfile.write("DATA")
content = float(inputDF.at[functionName, column])
for i in range(repeated):
tmpfile.write(f" {content}")
tmpfile.write("\n")
tmpfile.close()
# 引数:input_x は横軸に相当する値のリスト
# 引数:input_y は縦軸に相当する値のリスト
# 引数:repeated はDATA列を繰り返す回数
# 引数:includeFirstData は学習に最初の値を使うかどうかを指定
# 引数:includeLastData は学習に最初の値を使うかどうかを指定
def generateInputFileForExtraPfromLists(
input_x=[1, 2, 4, 8, 16, 32, 64, 128, 256],
input_y=[1.0, 1, 1, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
repeated=3,
includeFirstData=False,
includeLastData=False,
fileName="tmp_functionName.txt",
):
# https://github.com/extra-p/extrap/blob/master/docs/examples/input.txt
# が入力例のテキストファイルとなっており、これに対応するファイルを作成する。
tmpfile = open(fileName, "w+")
tmpfile.write("PARAMETER p\n")
tmpfile.write("POINTS")
if includeLastData == False:
input_x = input_x[:-1]
input_y = input_y[:-1]
if includeFirstData == False:
input_x = input_x[1:]
input_y = input_y[1:]
for x in input_x:
tmpfile.write(f" ({x})")
tmpfile.write("\n\n")
tmpfile.write(f"REGION {functionName}\n")
tmpfile.write("METRIC functionCall\n")
for y in input_y:
tmpfile.write("DATA")
content = float(y)
for i in range(repeated):
tmpfile.write(f" {content}")
tmpfile.write("\n")
tmpfile.close()
##### 使用例 #####
##準備##
# inputDF = returnRawDF(fix="Class")
# input_x = inputDF.columns.tolist()
# functionNames = inputDF.index.tolist()
# functionName = functionNames[0]
# input_y = []
# for x in input_x:
# input_y.append(inputDF.at[functionName, x])
##使用##
# generateInputFileForExtraPfromLists(input_x=input_x, input_y=input_y, repeated=3, includeFirstData=False, includeLastData=False, fileName="tmp_functionName.txt")
# モデルの共通部分となるクラス
# すべての引数はただのリスト。クラスの初期化時に""np.reshape()""を実行する
class ModelBase2:
def __init__(
self,
train_x,
train_y,
target_x=[],
target_y=[],
benchmark_name="benchmark_name",
function_name="function_name",
):
self.benchmark_name = benchmark_name
self.function_name = function_name
self.train_x = np.reshape(train_x, (-1, 1))
self.train_y = np.reshape(train_y, (-1, 1))
self.target_x = np.reshape(target_x, (-1, 1))
self.target_x = np.reshape(target_x, (-1, 1))
# 線形モデルでロバスト回帰を行う
# 作成したModelBase2を継承
class ModelLin_rob(ModelBase2):
def calc_hr(self):
self.hr = HuberRegressor()
self.hr.fit(self.train_x, self.train_y)
def calc_mape_score(self):
test_y_predicted = self.lr.predict(self.target_x)
self.mape_score = float(mape_score(self.target_y, test_y_predicted))
def calc_mape_score_InTrain(self):
train_y_predicted = self.lr.predict(self.train_x)
self.mape_score_InTrain = float(mape_score(self.train_y, train_y_predicted))
def predict(self, num):
predicted = self.hr.predict(num)
return predicted
def ModelName(self):
return "ModelLin_rob"
# 反比例モデルでロバスト回帰を行う
# ModelBase2を継承
def ip_func(x):
return 1 / x
class ModelIp_rob(ModelBase2):
def calc_hr(self):
self.transformer_ip = sp.FunctionTransformer(func=ip_func, inverse_func=ip_func)
y_train_ip = self.transformer_ip.transform(self.train_y)
self.hr = HuberRegressor()
self.hr.fit(self.train_x, y_train_ip)
def calc_mape_score(self):
test_y_predicted_ip = self.hr.predict(self.test_x)
test_y_predicted = self.transformer_ip.inverse_transform(test_y_predicted_ip)
self.mape_score = float(mape_score(self.test_y, test_y_predicted))
def calc_mape_score_InTrain(self):
train_y_predicted_ip = self.hr.predict(self.train_x)
train_y_predicted = self.transformer_ip.inverse_transform(train_y_predicted_ip)
self.mape_score_InTrain = float(mape_score(self.train_y, train_y_predicted))
def predict(self, num):
predicted_ip = self.hr.predict(num)
predicted = self.transformer_ip.inverse_transform(predicted_ip)
return predicted
def ModelName(self):
return "ModelIP_rob"
# 対数モデルでロバスト回帰を行う
# ModelBase2を継承
def inverter_log10_func(x):
return 10**x
class ModelLog10_rob(ModelBase2):
def calc_hr(self):
self.transformer_log10 = sp.FunctionTransformer(
func=np.log10, inverse_func=inverter_log10_func
)
x_train_log10 = self.transformer_log10.transform(self.train_x)
y_train_log10 = self.transformer_log10.transform(self.train_y)
self.hr = HuberRegressor()
self.hr.fit(x_train_log10, y_train_log10)
def calc_mape_score(self):
test_x_log10 = self.transformer_log10.transform(self.test_x)
test_y_predicted_log10 = self.hr.predict(test_x_log10)
test_y_predicted = self.transformer_log10.inverse_transform(
test_y_predicted_log10
)
self.mape_score = float(mape_score(self.test_y, test_y_predicted))
def calc_mape_score_InTrain(self):
train_x_log10 = self.transformer_log10.transform(self.train_x)
train_y_predicted_log10 = self.hr.predict(train_x_log10)
train_y_predicted = self.transformer_log10.inverse_transform(
train_y_predicted_log10
)
self.mape_score_InTrain = float(mape_score(self.train_y, train_y_predicted))
def predict(self, num):
num_log10 = self.transformer_log10.transform(num)
predicted_log10 = self.hr.predict(num_log10)
predicted = self.transformer_log10.inverse_transform(predicted_log10)
return predicted
def ModelName(self):
return "ModelLog10_rob"
# 反比例モデルmk2
# ModelBaseを継承した反比例モデルは yの逆数をとっていた。
# しかし、それでは意図したモデルとならないことが判明した。
# そのため、xの逆数をとる、反比例モデルがコレ。
class ModelIp_mk2(ModelBase2):
def calc_lr(self):
self.transformer_ip = sp.FunctionTransformer(func=ip_func, inverse_func=ip_func)
x_train_ip = self.transformer_ip.transform(self.train_x)
self.lr = LinearRegression()
self.lr.fit(x_train_ip, self.train_y)
def predict(self, num):
num = np.reshape(num, (-1, 1))
numConverted = self.transformer_ip.transform(num)
predicted = self.lr.predict(numConverted)
return predicted
def return_coef_(self):
return self.lr.coef_
def return_intercept_(self):
return self.lr.intercept_
def ModelName(self):
return "ModelIp"
# # 使用例
# modelIpMk2 = ModelIp_mk2(train_x=train_x, train_y=train_y, target_x=target_x, target_y=target_y)
# modelIpMk2.calc_lr()
# plot_y = modelIpMk2.predict(plot_x)
# 対数モデルmk2
# ModelBaseを継承した対数モデルはどこかに不具合がある。
# ModelBase2を継承して、改修した対数モデルがこのモデル。
class ModelLog10_mk2(ModelBase2):
def calc_lr(self):
self.transformer_log10 = sp.FunctionTransformer(
func=np.log10, inverse_func=inverter_log10_func
)
x_train_log10 = self.transformer_log10.transform(self.train_x)
self.lr = LinearRegression()
self.lr.fit(x_train_log10, self.train_y)
def predict(self, num):
num = np.reshape(num, (-1, 1))
numConverted = self.transformer_log10.transform(num)
predicted = self.lr.predict(numConverted)
return predicted
def return_coef_(self):
return self.lr.coef_
def return_intercept_(self):
return self.lr.intercept_
def ModelName(self):
return "ModelLog"
# # 使用例
# modelLog10Mk2 = ModelIp_mk2(train_x=train_x, train_y=train_y, target_x=target_x, target_y=target_y)
# modelLog10Mk2.calc_lr()
# plot_y = modelLog10Mk2.predict(plot_x)
# 線形モデルmk2
# ModelBase2を継承して、改修したモデル。
class ModelLin_mk2(ModelBase2):
def calc_lr(self):
self.lr = LinearRegression()
self.lr.fit(self.train_x, self.train_y)
def predict(self, num):
num = np.reshape(num, (-1, 1))
predicted = self.lr.predict(num)
return predicted
def return_coef_(self):
return self.lr.coef_
def return_intercept_(self):
return self.lr.intercept_
def ModelName(self):
return "ModelLin"
# 分岐モデルmk2
# ModelBase2を継承して、改修したモデル
class ModelBranch_mk2(ModelBase2):
def calc_lr(self):
self.t = np.ndarray.argmax(self.train_y)
self.t_num = self.train_x[self.t]
if self.t == 0 or self.t == len(self.train_y) - 1:
self.lr1 = LinearRegression()
self.lr1.fit(self.train_x, self.train_y)
self.lr2 = LinearRegression()
self.lr2.fit(self.train_x, self.train_y)
else:
self.train_x_1 = self.train_x[: self.t]
self.train_x_2 = self.train_x[self.t :]
self.train_y_1 = self.train_y[: self.t]
self.train_y_2 = self.train_y[self.t :]
self.lr1 = LinearRegression()
self.lr1.fit(self.train_x_1, self.train_y_1)
self.lr2 = LinearRegression()
self.lr2.fit(self.train_x_2, self.train_y_2)
def predict(self, num):
num = np.reshape(num, (-1, 1))
num_t = np.ndarray.argmax(num)
num_t_max = num[num_t]
k = np.abs(np.asarray(num) - self.t_num).argmin()
if len(num) == 1 and num_t_max >= self.t_num:
predicted = self.lr2.predict(num)
return predicted
elif num_t_max < self.train_x[self.t] or k == 0:
predicted = self.lr1.predict(num)
return predicted
else:
num_1 = num[:k]
num_2 = num[k:]
predicted_1 = self.lr1.predict(num_1)
predicted_2 = self.lr2.predict(num_2)
predicted = np.concatenate([predicted_1, predicted_2])
return predicted
def ModelName(self):
return "ModelBranch"
def collectFunctionNamesPerBenchmark(
benchmarkName="cg",
benchmarkClasses=["A", "B", "C"],
processes=[1, 2, 4, 8],
baseDir="./csv_files/",
):
dataframesList = []
for benchmarkClass in benchmarkClasses:
for process in processes:
# ファイル名を決める
fileName = "pprof_" + benchmarkName + benchmarkClass + str(process) + ".csv"
# ファイルのパスを決める
filePath = baseDir + fileName
# ファイルが存在して、空データではないという条件で
if os.path.exists(filePath) and os.stat(filePath).st_size != 0:
DF = pd.read_csv(filePath)
len_indice = DF.shape[0]
len_columns = DF.shape[1]
DF = DF.rename(columns={"Name": "functionName", "#Call": "call"})
DF["benchmarkClass"] = [
ConvertBencharkClass_inNPB(benchmarkClass)
] * len_indice
DF["benchmarkName"] = [benchmarkName] * len_indice
DF["process"] = [str(process)] * len_indice
dataframesList.append(DF)
returnDF = pd.concat(dataframesList)
return returnDF
# 実験結果を集計するためのデータフレームのカラムの名称のリストを返す関数
def return_numOfColumns(dataType=False):
returnList = []
returnDict = {}
# ベンチマーク名
returnList.append("benchmarkName")
returnDict["benchmarkName"] = str
# 関数名
returnList.append("functionName")
returnDict["functionName"] = str
# 使用データ(数値化されたリスト,説明変数)
returnList.append("usedData_x")
returnDict["usedData_x"] = object
# 使用データ(数値化されたリスト,目的変数)
returnList.append("usedData_y")
returnDict["usedData_y"] = object
# 使用データ数
returnList.append("numOfData")
returnDict["numOfData"] = "int16"
# 固定したもの("Process" or "Class")
returnList.append("ProcessOrClass")
returnDict["ProcessOrClass"] = str
# 固定したもの(プロセス数(数値)or問題サイズ(数値))
returnList.append("fixed")
returnDict["fixed"] = "float32"
# 予測対象プロセス数
returnList.append("targetNumOfProcess")
returnDict["targetNumOfProcess"] = "int16"
# 予測対象問題サイズ(数値)
returnList.append("targetNumOfProblemSize")
returnDict["targetNumOfProblemSize"] = "float32"
# 予測対象問題サイズ
returnList.append("targetProblemSize")
returnDict["targetProblemSize"] = str
# 予測対象関数コール回数
returnList.append("targetNumOfFunctionCall")
returnDict["targetNumOfFunctionCall"] = "float32"
# 予測対象の条件で予測された関数コール回数
returnList.append("predictedTargetNumOfFunctionCall")
returnDict["predictedTargetNumOfFunctionCall"] = "float32"
# 線形モデルのオブジェクト
returnList.append("objectLinModel")
returnDict["objectLinModel"] = object
# 線形モデルのMAPE
returnList.append("MAPEOfLinModel")
returnDict["MAPEOfLinModel"] = "float32"
# 反比例モデルのオブジェクト
returnList.append("objectIpModel")
returnDict["objectIpModel"] = object
# 反比例モデルのMAPE
returnList.append("MAPEOfIpModel")
returnDict["MAPEOfIpModel"] = "float32"
# 対数モデルのオブジェクト
returnList.append("objectLogModel")
returnDict["objectLogModel"] = object
# 対数モデルのMAPE
returnList.append("MAPEOfLogModel")
returnDict["MAPEOfLogModel"] = "float32"
# 線形飽和モデルのオブジェクト
returnList.append("objectBranchModel")
returnDict["objectBranchModel"] = object
# 線形飽和モデルのMAPE
returnList.append("MAPEOfBranchModel")
returnDict["MAPEOfBranchModel"] = "float32"
# 最も相対誤差の小さいモデル名
returnList.append("objectBestModelName")
returnDict["objectBestModelName"] = object
# 最小の相対誤差
returnList.append("relativeErrorOfBestModel")
returnDict["relativeErrorOfBestModel"] = "float32"
if dataType == True:
return returnDict
else:
return returnList
# 使用例
# columnNames = return_numOfColumns()
# df_sample = pd.DataFrame(columns=columnNames)
# df_sample
# ベンチマーク名・関数名・プロセス数・問題サイズを指定することで、その条件での関数コール回数を取得する関数
def returnSpecificData(
benchmarkName="cg", functionName=".TAU_application", process=256, benchmarkClass="D"
):
targetRawDF = returnRawDF(
Benchmark=benchmarkName,
functionName=functionName,
benchmarkClass=[benchmarkClass],
FixedProcess=process,
Processes=[process],
FixedBenchmarkClass=benchmarkClass,
)
return targetRawDF.iat[0, 0]
# returnSpecificData(benchmarkName="mg", functionName="BUBBLE", process=256, benchmarkClass="B")
# return_numOfColumns()でのカラム名としてのモデル名、モデルのメソッドModelName()が返すモデル名を相互的なキー・バリューとした辞書を返す関数
def returnDictModelNames():
returnDict = {}
# カラム名をキー・モデルが返すモデル名をバリュー
returnDict["objectLinModel"] = "ModelLin"
returnDict["objectIpModel"] = "ModelIp"
returnDict["objectLogModel"] = "ModelLog"
returnDict["objectBranchModel"] = "ModelBranch"
# モデルが返すモデル名をキー・カラム名をバリュー
returnDict["ModelLin"] = "objectLinModel"
returnDict["ModelIp"] = "objectIpModel"
returnDict["ModelLog"] = "objectLogModel"
returnDict["ModelBranch"] = "objectBranchModel"
return returnDict
# 引数のreal_data, predict_dataから相対誤差を返す関数
# 単位は「%」
def returnRelativeErrorRate(real_data: float, predict_data: float):
return abs(real_data - predict_data) / real_data * 100
# 結果を集計するためのDFに挿入するSeriesを作成する関数
def returnSeriesOfData(
benchmarkName="benhmarkName",
functionName="functionName",
raw_x=[1, 2, 3],
raw_y=[1, 2, 3],
fix="Class",
fixed="B",
targetProcess=256,
targetProblemSize="B",
):
dataSeries = pd.Series(index=return_numOfColumns(), dtype=object)
dataSeries["benchmarkName"] = benchmarkName
dataSeries["functionName"] = functionName
dataSeries["usedData_x"] = raw_x
dataSeries["usedData_y"] = raw_y
dataSeries["numOfData"] = len(raw_y)
dataSeries["ProcessOrClass"] = fix
dataSeries["fixed"] = ConvertBencharkClass_inNPB(fixed)
targetNumOfProcess = targetProcess
dataSeries["targetNumOfProcess"] = targetProcess
targetProblemsize = fixed
targetNumOfProblemSize = ConvertBencharkClass_inNPB(targetProblemsize)
dataSeries["targetNumOfProblemSize"] = targetNumOfProblemSize
dataSeries["targetProblemSize"] = targetProblemsize
dataSeries["targetNumOfFunctionCall"] = returnSpecificData(
benchmarkName=benchmarkName,
functionName=functionName,
process=targetNumOfProcess,
benchmarkClass=targetProblemsize,
)
# MAPE の算出には mape_score()を用いる
# mape_score()の返り値の単位は%
raw_x
raw_y
# 線形モデル
modelLin = ModelLin_mk2(train_x=raw_x, train_y=raw_y)
modelLin.calc_lr()
predicted_y = modelLin.predict(raw_x)
dataSeries["objectLinModel"] = modelLin
dataSeries["MAPEOfLinModel"] = mape_score(predicted_y, raw_y)
# 反比例モデル
modelIp = ModelIp_mk2(train_x=raw_x, train_y=raw_y)
modelIp.calc_lr()
predicted_y = modelIp.predict(raw_x)
dataSeries["objectIpModel"] = modelIp
dataSeries["MAPEOfIpModel"] = mape_score(predicted_y, raw_y)
# 対数モデル
modelLog = ModelLog10_mk2(train_x=raw_x, train_y=raw_y)
modelLog.calc_lr()
predicted_y = modelLog.predict(raw_x)
dataSeries["objectLogModel"] = modelLog
dataSeries["MAPEOfLogModel"] = mape_score(predicted_y, raw_y)
# 分岐モデル
modelBranch = ModelBranch_mk2(train_x=raw_x, train_y=raw_y)
modelBranch.calc_lr()
predicted_y = modelBranch.predict(raw_x)
dataSeries["objectBranchModel"] = modelBranch
dataSeries["MAPEOfBranchModel"] = mape_score(predicted_y, raw_y)
# 最適なモデルのモデルのモデル名・MAPE値の算出
listToCalcBestModel = {}
listToCalcBestModel[dataSeries["objectLinModel"].ModelName()] = dataSeries[
"MAPEOfLinModel"
]
listToCalcBestModel[dataSeries["objectIpModel"].ModelName()] = dataSeries[
"MAPEOfIpModel"
]
listToCalcBestModel[dataSeries["objectLogModel"].ModelName()] = dataSeries[
"MAPEOfLogModel"
]
listToCalcBestModel[dataSeries["objectBranchModel"].ModelName()] = dataSeries[
"MAPEOfBranchModel"
]
minMAPE = min(listToCalcBestModel.values())
dataSeries["MAPEOfBestModel"] = minMAPE
dataSeries["objectBestModelName"] = [
k for k, v in listToCalcBestModel.items() if v == minMAPE
][0]
# relativeErrorOfBestModelへのデータ格納処理
# これには、学習したモデルから対象となる関数コール回数を予測し、予測値と実測値の相対誤差を入れる
dictOfModelNames = returnDictModelNames()
bestModelName = dataSeries["objectBestModelName"]
bestModelColumnName = dictOfModelNames[bestModelName]
bestModelObject = dataSeries[bestModelColumnName]
if fix == "Class":
target_x = targetProcess
else:
target_x = ConvertBencharkClass_inNPB(targetProblemSize)
predictedTargetData = bestModelObject.predict(target_x).tolist()[0]
dataSeries["predictedTargetNumOfFunctionCall"] = predictedTargetData
dataSeries["relativeErrorOfBestModel"] = returnRelativeErrorRate(
real_data=dataSeries["targetNumOfFunctionCall"],
predict_data=predictedTargetData,
)
return dataSeries
# 引数noNaNDFのデータに基づいて各関数での予測精度などを保持したDFを返す関数
# 引数詳細
# benchmark:ベンチマーク名
# noNaNDF:生の実験データから NaN が含まれる関数の行を削除したDF
# targetNumOfProcess:対象となるプロセス数を格納した変数
# targetProblemSize:対象となる問題サイズを格納した変数
# fix:"Process" or "Class"
def return_calculatedDF(
benchmark: str, noNaNDF, targetNumOfProcess=256, targetProblemSize="B", fix="Class"
):
# 取得した実験データから NaN が含まれない関数名のリスト
functionNames = noNaNDF.index.tolist()
# プロセス数のリスト
processes = noNaNDF.columns.tolist()
# 集計するためのDF
calculatedDF = pd.DataFrame(columns=return_numOfColumns())
calculatedDF = calculatedDF.astype(return_numOfColumns(dataType=True))
for functionName in functionNames:
# print(f"functionName={functionName}")
for i in reversed(range(1, len(processes))):
indexSeparator = i + 1
raw_y = noNaNDF.loc[functionName].tolist()[:indexSeparator]
raw_x = processes[:indexSeparator]
# print(f"raw_x={raw_x}, raw_y={raw_y}")
dataSeries = returnSeriesOfData(
benchmarkName=benchmark,
functionName=functionName,
raw_x=raw_x,
raw_y=raw_y,
fix=fix,
fixed=targetProblemSize,
targetProcess=targetNumOfProcess,
targetProblemSize=targetProblemSize,
)
calculatedDF = calculatedDF.append(dataSeries, ignore_index=True)
return calculatedDF
# 集計した各ベンチマークごとにX/Y軸のデータを保持した辞書を返す
def returnDictForPlotPerNumOfUsedData(
Benchmark=["cg"],
fix="Class",
benchmarkClass=["A", "B", "C", "D"],
FixedProcess=64,
Processes=[1, 2, 4, 8, 16, 32, 64, 128, 256],
FixedBenchmarkClass="C",
):
# 実際に返す辞書
returnDict = {}
# 計算されたDFを保持するための辞書
dictOfCalculatedDF = {}
# プロットするためのデータを集計する
for benchmark in benchmarks:
DF = returnRawDFperBenchmark(
Benchmark=benchmark,
fix=fix,
benchmarkClass=benchmarkClass,
FixedProcess=FixedProcess,
Processes=Processes,
FixedBenchmarkClass=FixedBenchmarkClass,
)
noNaNDF = DF.dropna(how="any")
dictOfCalculatedDF[benchmark] = return_calculatedDF(
benchmark=benchmark,
noNaNDF=noNaNDF,
targetNumOfProcess=256,
targetProblemSize=FixedBenchmarkClass,
fix=fix,
)
# print(dictOfCalculatedDF[benchmark][["benchmarkName", "functionName", "objectBestModelName", "relativeErrorOfBestModel"]])
# from IPython.core.debugger import Pdb; Pdb().set_trace()
# ベンチマークごとに集計する
for benchmark in list(dictOfCalculatedDF.keys()):
# numOfData列の要素一覧を作成し、ソートされたリストを、listOfNumDataに格納する
valueCalculatedDF = dictOfCalculatedDF[benchmark]
listOfNumOfData = valueCalculatedDF["numOfData"].tolist()
listOfNumOfData = sorted(list(set(listOfNumOfData)))
# 使用したプロファイル数をキー・最適モデルでの相対誤差の平均をバリューとした辞書を作成する
dictAverageRelativeErrorOfBestModel = {}
x = []
y = []
for numOfData in listOfNumOfData:
# 使用したプロファイル数で抽出
extractedPerNumOfProfileDF = valueCalculatedDF[
valueCalculatedDF["numOfData"] == numOfData
]
meanDF = extractedPerNumOfProfileDF.mean()
data = meanDF.at["relativeErrorOfBestModel"]
dictAverageRelativeErrorOfBestModel[numOfData] = data
# プロットするにあたってX/Y軸に分ける
x = list(dictAverageRelativeErrorOfBestModel.keys())
y = [dictAverageRelativeErrorOfBestModel[key] for key in x]
returnDict[benchmark] = {"x": x, "y": y}
return returnDict
# returnDictForPlotPerNumOfUsedData(Benchmark=benchmarks, fix="Class", benchmarkClass=[
# "A", "B", "C", "D"], FixedProcess=64, Processes=[1, 2, 4, 8, 16, 32, 64, 128, 256], FixedBenchmarkClass="C")
# 相対コストを返す関数(返り値の単位は%)
# variablesToLearn:予測に用いる説明変数のリスト ex1:[1,2,4,8], ex2:["A", "B", "C"]
# variablesToPreditct:予測したい説明変数のリスト ex1:[256], ex2:["D"]
# fixedClassOrProcess:"Class"or"Process"
# fixed:"C"
def returnRelativeCost(
benchmark="cg",
variablesToLearn=[1, 2, 4, 8, 16, 32, 64, 128],
variablesToPredict=[256],
fixedClassOrProcess="Class",
fixed="C",
):
if fixedClassOrProcess == "Class":
pass
else:
pass
# 目標となる環境でのコスト
targetCost = 0
for variableToPredict in variablesToPredict:
targetCost += variableToPredict * return_ExecTime(
benchmark=benchmark, BenchmarkClass=fixed, Process=variableToPredict
)
# 予測するのに必要なコスト
predictCost = 0
for variableToLearn in variablesToLearn:
predictCost += variableToLearn * return_ExecTime(
benchmark=benchmark, BenchmarkClass=fixed, Process=variableToLearn
)
return 100 * predictCost / targetCost
# ipynb形式のライブラリのインポート
%run ./lib/lib.ipynb
DEBUG:__main__:hello
| ベンチマーク名 | 平均誤差率(%) | コスト比(%) |
|---|---|---|
| ベンチマーク名 | 採用割合(最大MAPE(%), 最小MAPE(%)) |
|---|---|
| モデル(1), モデル(2), ... |
plt.figure(figsize=(4, 3))
plt.xlabel("使用したプロファイル数")
plt.ylabel("平均誤差率(%)")
<Figure size 288x216 with 0 Axes>
DEBUG:matplotlib.font_manager:findfont: Matching IPAexGothic:style=normal:variant=normal:weight=normal:stretch=normal:size=10.0.
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXNonUniBol.ttf', name='STIXNonUnicode', style='normal', variant='normal', weight=700, stretch='normal', size='scalable')) = 10.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerif-Bold.ttf', name='DejaVu Serif', style='normal', variant='normal', weight=700, stretch='normal', size='scalable')) = 10.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerif-BoldItalic.ttf', name='DejaVu Serif', style='italic', variant='normal', weight=700, stretch='normal', size='scalable')) = 11.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXNonUniIta.ttf', name='STIXNonUnicode', style='italic', variant='normal', weight=400, stretch='normal', size='scalable')) = 11.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXGeneralItalic.ttf', name='STIXGeneral', style='italic', variant='normal', weight=400, stretch='normal', size='scalable')) = 11.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXGeneralBolIta.ttf', name='STIXGeneral', style='italic', variant='normal', weight=700, stretch='normal', size='scalable')) = 11.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSansMono-BoldOblique.ttf', name='DejaVu Sans Mono', style='oblique', variant='normal', weight=700, stretch='normal', size='scalable')) = 11.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizOneSymBol.ttf', name='STIXSizeOneSym', style='normal', variant='normal', weight=700, stretch='normal', size='scalable')) = 10.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/cmb10.ttf', name='cmb10', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerifDisplay.ttf', name='DejaVu Serif Display', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizFiveSymReg.ttf', name='STIXSizeFiveSym', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXNonUni.ttf', name='STIXNonUnicode', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/cmr10.ttf', name='cmr10', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizOneSymReg.ttf', name='STIXSizeOneSym', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSansMono.ttf', name='DejaVu Sans Mono', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSansDisplay.ttf', name='DejaVu Sans Display', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/cmex10.ttf', name='cmex10', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizFourSymBol.ttf', name='STIXSizeFourSym', style='normal', variant='normal', weight=700, stretch='normal', size='scalable')) = 10.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerif.ttf', name='DejaVu Serif', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizFourSymReg.ttf', name='STIXSizeFourSym', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSans-Bold.ttf', name='DejaVu Sans', style='normal', variant='normal', weight=700, stretch='normal', size='scalable')) = 10.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizTwoSymBol.ttf', name='STIXSizeTwoSym', style='normal', variant='normal', weight=700, stretch='normal', size='scalable')) = 10.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXGeneralBol.ttf', name='STIXGeneral', style='normal', variant='normal', weight=700, stretch='normal', size='scalable')) = 10.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/cmsy10.ttf', name='cmsy10', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizThreeSymReg.ttf', name='STIXSizeThreeSym', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizTwoSymReg.ttf', name='STIXSizeTwoSym', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXGeneral.ttf', name='STIXGeneral', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSans-BoldOblique.ttf', name='DejaVu Sans', style='oblique', variant='normal', weight=700, stretch='normal', size='scalable')) = 11.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSansMono-Oblique.ttf', name='DejaVu Sans Mono', style='oblique', variant='normal', weight=400, stretch='normal', size='scalable')) = 11.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/cmmi10.ttf', name='cmmi10', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/cmtt10.ttf', name='cmtt10', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSans.ttf', name='DejaVu Sans', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSans-Oblique.ttf', name='DejaVu Sans', style='oblique', variant='normal', weight=400, stretch='normal', size='scalable')) = 11.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/cmss10.ttf', name='cmss10', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizThreeSymBol.ttf', name='STIXSizeThreeSym', style='normal', variant='normal', weight=700, stretch='normal', size='scalable')) = 10.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerif-Italic.ttf', name='DejaVu Serif', style='italic', variant='normal', weight=400, stretch='normal', size='scalable')) = 11.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSansMono-Bold.ttf', name='DejaVu Sans Mono', style='normal', variant='normal', weight=700, stretch='normal', size='scalable')) = 10.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXNonUniBolIta.ttf', name='STIXNonUnicode', style='italic', variant='normal', weight=700, stretch='normal', size='scalable')) = 11.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/liberation/LiberationSerif-Italic.ttf', name='Liberation Serif', style='italic', variant='normal', weight=400, stretch='normal', size='scalable')) = 11.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/liberation/LiberationSans-BoldItalic.ttf', name='Liberation Sans', style='italic', variant='normal', weight=700, stretch='normal', size='scalable')) = 11.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/liberation/LiberationSerif-Bold.ttf', name='Liberation Serif', style='normal', variant='normal', weight=700, stretch='normal', size='scalable')) = 10.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/liberation/LiberationMono-Regular.ttf', name='Liberation Mono', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf', name='DejaVu Sans', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/dejavu/DejaVuSansMono-Bold.ttf', name='DejaVu Sans Mono', style='normal', variant='normal', weight=700, stretch='normal', size='scalable')) = 10.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/liberation/LiberationSans-Italic.ttf', name='Liberation Sans', style='italic', variant='normal', weight=400, stretch='normal', size='scalable')) = 11.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf', name='DejaVu Sans', style='normal', variant='normal', weight=700, stretch='normal', size='scalable')) = 10.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/liberation/LiberationSansNarrow-BoldItalic.ttf', name='Liberation Sans Narrow', style='italic', variant='normal', weight=700, stretch='condensed', size='scalable')) = 11.535
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/dejavu/DejaVuSerif.ttf', name='DejaVu Serif', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/liberation/LiberationMono-Italic.ttf', name='Liberation Mono', style='italic', variant='normal', weight=400, stretch='normal', size='scalable')) = 11.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf', name='Liberation Sans', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/liberation/LiberationSansNarrow-Bold.ttf', name='Liberation Sans Narrow', style='normal', variant='normal', weight=700, stretch='condensed', size='scalable')) = 10.535
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/dejavu/DejaVuSerif-Bold.ttf', name='DejaVu Serif', style='normal', variant='normal', weight=700, stretch='normal', size='scalable')) = 10.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/liberation/LiberationMono-BoldItalic.ttf', name='Liberation Mono', style='italic', variant='normal', weight=700, stretch='normal', size='scalable')) = 11.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/liberation/LiberationSerif-Regular.ttf', name='Liberation Serif', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/liberation/LiberationSansNarrow-Italic.ttf', name='Liberation Sans Narrow', style='italic', variant='normal', weight=400, stretch='condensed', size='scalable')) = 11.25
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/liberation/LiberationSansNarrow-Regular.ttf', name='Liberation Sans Narrow', style='normal', variant='normal', weight=400, stretch='condensed', size='scalable')) = 10.25
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/liberation/LiberationSerif-BoldItalic.ttf', name='Liberation Serif', style='italic', variant='normal', weight=700, stretch='normal', size='scalable')) = 11.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf', name='DejaVu Sans Mono', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/liberation/LiberationMono-Bold.ttf', name='Liberation Mono', style='normal', variant='normal', weight=700, stretch='normal', size='scalable')) = 10.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf', name='Liberation Sans', style='normal', variant='normal', weight=700, stretch='normal', size='scalable')) = 10.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/japanize_matplotlib/fonts/ipaexg.ttf', name='IPAexGothic', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 0.05
DEBUG:matplotlib.font_manager:findfont: Matching IPAexGothic:style=normal:variant=normal:weight=normal:stretch=normal:size=10.0 to IPAexGothic ('/usr/local/lib/python3.10/site-packages/japanize_matplotlib/fonts/ipaexg.ttf') with score of 0.050000.
Text(0.5, 0, '使用したプロファイル数')
Text(0, 0.5, '平均誤差率(%)')
DEBUG:matplotlib.font_manager:findfont: Matching IPAexGothic:style=normal:variant=normal:weight=normal:stretch=normal:size=10.0.
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXNonUniBol.ttf', name='STIXNonUnicode', style='normal', variant='normal', weight=700, stretch='normal', size='scalable')) = 10.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerif-Bold.ttf', name='DejaVu Serif', style='normal', variant='normal', weight=700, stretch='normal', size='scalable')) = 10.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerif-BoldItalic.ttf', name='DejaVu Serif', style='italic', variant='normal', weight=700, stretch='normal', size='scalable')) = 11.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXNonUniIta.ttf', name='STIXNonUnicode', style='italic', variant='normal', weight=400, stretch='normal', size='scalable')) = 11.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXGeneralItalic.ttf', name='STIXGeneral', style='italic', variant='normal', weight=400, stretch='normal', size='scalable')) = 11.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXGeneralBolIta.ttf', name='STIXGeneral', style='italic', variant='normal', weight=700, stretch='normal', size='scalable')) = 11.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSansMono-BoldOblique.ttf', name='DejaVu Sans Mono', style='oblique', variant='normal', weight=700, stretch='normal', size='scalable')) = 11.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizOneSymBol.ttf', name='STIXSizeOneSym', style='normal', variant='normal', weight=700, stretch='normal', size='scalable')) = 10.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/cmb10.ttf', name='cmb10', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerifDisplay.ttf', name='DejaVu Serif Display', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizFiveSymReg.ttf', name='STIXSizeFiveSym', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXNonUni.ttf', name='STIXNonUnicode', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/cmr10.ttf', name='cmr10', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizOneSymReg.ttf', name='STIXSizeOneSym', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSansMono.ttf', name='DejaVu Sans Mono', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSansDisplay.ttf', name='DejaVu Sans Display', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/cmex10.ttf', name='cmex10', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizFourSymBol.ttf', name='STIXSizeFourSym', style='normal', variant='normal', weight=700, stretch='normal', size='scalable')) = 10.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerif.ttf', name='DejaVu Serif', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizFourSymReg.ttf', name='STIXSizeFourSym', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSans-Bold.ttf', name='DejaVu Sans', style='normal', variant='normal', weight=700, stretch='normal', size='scalable')) = 10.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizTwoSymBol.ttf', name='STIXSizeTwoSym', style='normal', variant='normal', weight=700, stretch='normal', size='scalable')) = 10.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXGeneralBol.ttf', name='STIXGeneral', style='normal', variant='normal', weight=700, stretch='normal', size='scalable')) = 10.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/cmsy10.ttf', name='cmsy10', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizThreeSymReg.ttf', name='STIXSizeThreeSym', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizTwoSymReg.ttf', name='STIXSizeTwoSym', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXGeneral.ttf', name='STIXGeneral', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSans-BoldOblique.ttf', name='DejaVu Sans', style='oblique', variant='normal', weight=700, stretch='normal', size='scalable')) = 11.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSansMono-Oblique.ttf', name='DejaVu Sans Mono', style='oblique', variant='normal', weight=400, stretch='normal', size='scalable')) = 11.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/cmmi10.ttf', name='cmmi10', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/cmtt10.ttf', name='cmtt10', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSans.ttf', name='DejaVu Sans', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSans-Oblique.ttf', name='DejaVu Sans', style='oblique', variant='normal', weight=400, stretch='normal', size='scalable')) = 11.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/cmss10.ttf', name='cmss10', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizThreeSymBol.ttf', name='STIXSizeThreeSym', style='normal', variant='normal', weight=700, stretch='normal', size='scalable')) = 10.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerif-Italic.ttf', name='DejaVu Serif', style='italic', variant='normal', weight=400, stretch='normal', size='scalable')) = 11.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSansMono-Bold.ttf', name='DejaVu Sans Mono', style='normal', variant='normal', weight=700, stretch='normal', size='scalable')) = 10.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXNonUniBolIta.ttf', name='STIXNonUnicode', style='italic', variant='normal', weight=700, stretch='normal', size='scalable')) = 11.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/liberation/LiberationSerif-Italic.ttf', name='Liberation Serif', style='italic', variant='normal', weight=400, stretch='normal', size='scalable')) = 11.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/liberation/LiberationSans-BoldItalic.ttf', name='Liberation Sans', style='italic', variant='normal', weight=700, stretch='normal', size='scalable')) = 11.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/liberation/LiberationSerif-Bold.ttf', name='Liberation Serif', style='normal', variant='normal', weight=700, stretch='normal', size='scalable')) = 10.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/liberation/LiberationMono-Regular.ttf', name='Liberation Mono', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf', name='DejaVu Sans', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/dejavu/DejaVuSansMono-Bold.ttf', name='DejaVu Sans Mono', style='normal', variant='normal', weight=700, stretch='normal', size='scalable')) = 10.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/liberation/LiberationSans-Italic.ttf', name='Liberation Sans', style='italic', variant='normal', weight=400, stretch='normal', size='scalable')) = 11.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf', name='DejaVu Sans', style='normal', variant='normal', weight=700, stretch='normal', size='scalable')) = 10.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/liberation/LiberationSansNarrow-BoldItalic.ttf', name='Liberation Sans Narrow', style='italic', variant='normal', weight=700, stretch='condensed', size='scalable')) = 11.535
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/dejavu/DejaVuSerif.ttf', name='DejaVu Serif', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/liberation/LiberationMono-Italic.ttf', name='Liberation Mono', style='italic', variant='normal', weight=400, stretch='normal', size='scalable')) = 11.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf', name='Liberation Sans', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/liberation/LiberationSansNarrow-Bold.ttf', name='Liberation Sans Narrow', style='normal', variant='normal', weight=700, stretch='condensed', size='scalable')) = 10.535
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/dejavu/DejaVuSerif-Bold.ttf', name='DejaVu Serif', style='normal', variant='normal', weight=700, stretch='normal', size='scalable')) = 10.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/liberation/LiberationMono-BoldItalic.ttf', name='Liberation Mono', style='italic', variant='normal', weight=700, stretch='normal', size='scalable')) = 11.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/liberation/LiberationSerif-Regular.ttf', name='Liberation Serif', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/liberation/LiberationSansNarrow-Italic.ttf', name='Liberation Sans Narrow', style='italic', variant='normal', weight=400, stretch='condensed', size='scalable')) = 11.25
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/liberation/LiberationSansNarrow-Regular.ttf', name='Liberation Sans Narrow', style='normal', variant='normal', weight=400, stretch='condensed', size='scalable')) = 10.25
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/liberation/LiberationSerif-BoldItalic.ttf', name='Liberation Serif', style='italic', variant='normal', weight=700, stretch='normal', size='scalable')) = 11.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf', name='DejaVu Sans Mono', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/liberation/LiberationMono-Bold.ttf', name='Liberation Mono', style='normal', variant='normal', weight=700, stretch='normal', size='scalable')) = 10.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf', name='Liberation Sans', style='normal', variant='normal', weight=700, stretch='normal', size='scalable')) = 10.335
DEBUG:matplotlib.font_manager:findfont: score(FontEntry(fname='/usr/local/lib/python3.10/site-packages/japanize_matplotlib/fonts/ipaexg.ttf', name='IPAexGothic', style='normal', variant='normal', weight=400, stretch='normal', size='scalable')) = 0.05
DEBUG:matplotlib.font_manager:findfont: Matching IPAexGothic:style=normal:variant=normal:weight=normal:stretch=normal:size=10.0 to IPAexGothic ('/usr/local/lib/python3.10/site-packages/japanize_matplotlib/fonts/ipaexg.ttf') with score of 0.050000.
# pd.get_option("display.max_columns")
# pd.get_option("display.max_rows")
pd.set_option("display.max_columns", 200)
pd.set_option("display.max_rows", 200)
# ベンチマーク名・関数名・プロセス数・問題サイズを指定することで、その条件での関数コール回数を取得する関数
def returnSpecificData(
benchmarkName="cg", functionName=".TAU_application", process=256, benchmarkClass="D"
):
targetRawDF = returnRawDF(
Benchmark=benchmarkName,
functionName=functionName,
benchmarkClass=[benchmarkClass],
FixedProcess=process,
Processes=[process],
FixedBenchmarkClass=benchmarkClass,
)
return targetRawDF.iat[0, 0]
# returnSpecificData(benchmarkName="mg", functionName="BUBBLE", process=256, benchmarkClass="B")
# bt, sp以外のベンチマーク名のリスト
benchmarks = ["cg", "ep", "ft", "is", "lu", "mg"]
# pandasのDFをprintした時の幅を広げる
pd.set_option("display.width", 100)
plt.figure(figsize=(5.72, 4), dpi=200)
# Extra-PでfixProcessデータを入力して出力したモデルの図時
plot_x = np.linspace(0.8, 256, 500)
# -3590464.6990329633 + 3759195.349891038 * p^(1/4)
plot_y = []
for x in plot_x:
plot_y.append(2286768.3333333326 + 301997.61904761934 * math.log2(x) ** (1))
plt.plot(plot_x, plot_y, label="ExtraP")
x = [1, 2, 4, 8, 16, 32, 64, 128]
y = [
1984770.0,
2263540.0,
2821070.0,
3936140.0,
3936140.0,
3936140.0,
3936140.0,
3936140.0,
]
x = np.array(x).reshape(-1, 1)
y = np.array(y).reshape(-1, 1)
plt.scatter(x, y, marker="o", label="予測に用いた関数コール回数")
plot_x = np.array(plot_x).reshape(-1, 1)
x_target = [256]
y_target = [3936140]
plt.scatter(x_target, y_target, marker="o", label="予測したい関数コール回数の実測値")
benchmarkName = "CG"
functionName = "ICNVRT"
# 線形モデル
# 対数モデル
# 反比例モデル
modelIpMk2 = ModelIp_mk2(
train_x=x,
train_y=y,
target_x=x_target,
target_y=y_target,
benchmark_name=benchmarkName,
function_name=functionName,
)
modelIpMk2.calc_lr()
plot_y_IpMk2 = modelIpMk2.predict(plot_x)
plt.plot(plot_x, plot_y_IpMk2, label="反比例モデル")
# 線形飽和モデル
modelBranchMk2 = ModelBranch_mk2(
train_x=x,
train_y=y,
target_x=x_target,
target_y=y_target,
benchmark_name=benchmarkName,
function_name=functionName,
)
modelBranchMk2.calc_lr()
plot_y_BranchMk2 = modelBranchMk2.predict(plot_x)
plt.plot(plot_x, plot_y_BranchMk2, label="線形飽和モデル")
# # 線形モデル
# model_lin = ModelLin(x, y, "CG", "ICNVRT", test_ratio=0)
# model_lin.calc_lr()
# plot_y_lin = model_lin.predict(plot_x)
# plt.plot(plot_x, plot_y_lin, label="線形モデル")
# # 対数モデル
# model_log10 = ModelLog10(x, y, "CG", "ICNVRT", test_ratio=0)
# model_log10.calc_lr()
# plot_y_log10 = model_log10.predict(plot_x)
# plt.plot(plot_x, plot_y_log10, label="対数モデル")
# # 反比例モデル
# model_ip = ModelIP(x, y, "CG", "ICNVRT", test_ratio=0)
# model_ip.calc_lr()
# plot_y_ip = model_ip.predict(plot_x)
# plt.plot(plot_x, plot_y_ip, label="反比例モデル")
# # 線形飽和モデル
# model_branch = ModelBranch(x, y, "CG", "ICNVRT", test_ratio=0)
# model_branch.calc_lr()
# plot_y_branch = model_branch.predict(plot_x)
# plt.plot(plot_x, plot_y_branch, label="線形飽和モデル")
# 凡例の表示
plt.legend()
# 軸ラベルの設定
plt.ylabel("関数コール回数")
plt.xlabel("実行コア数")
plt.scatter(x, y, marker="o")
<Figure size 1144x800 with 0 Axes>
[<matplotlib.lines.Line2D at 0x7fd869375c90>]
<matplotlib.collections.PathCollection at 0x7fd869375a20>
<matplotlib.collections.PathCollection at 0x7fd869376170>
[<matplotlib.lines.Line2D at 0x7fd8693766b0>]
[<matplotlib.lines.Line2D at 0x7fd8693769e0>]
<matplotlib.legend.Legend at 0x7fd8693765c0>
Text(0, 0.5, '関数コール回数')
Text(0.5, 0, '実行コア数')
<matplotlib.collections.PathCollection at 0x7fd869377940>
# 実際にプロットする
# print(f"fix={fix}, benchmarkClasses={benchmarkClasses}, fixedProcess={fixedProcess}, Processes={processes}, FixedBenchmarkClass={fixedBenchmarkClass}")
# print(f"targetNumOfProcess={targetNumOfProcess}, targetProblemSize={fixedBenchmarkClass}, fix={fix}")
# DF = returnRawDFperBenchmark(Benchmark="mg", fix="Process", benchmarkClass=["A", "B", "C", "D"], Processes=[
# 1, 2, 4, 8, 16, 32, 64, 128, 256], FixedBenchmarkClass="B", FixedProcess=64)
# DF.dropna(how='any')
# DF
benchmarkNamesExcludeBTSP = ["cg", "ep", "ft", "is", "lu", "mg"]
# classes = ["A", "B", "C", "D"]
classes = ["B"]
processes = [2, 4, 8, 16, 32, 64, 128, 256]
targetIndex = -1
csvDirPath = "./csv_files/"
dfByDatumExcludeBTSP = returnDFSummarizedData(
benchmarkNames=benchmarkNamesExcludeBTSP,
classes=classes,
processes=processes,
targetIndex=targetIndex,
csvDirPath=csvDirPath,
)
# dfByDatumExcludeBTSP
dictForLatexTable = {}
numOfData = 0
for benchmarkName in benchmarkNamesExcludeBTSP:
dictForLatexTable[benchmarkName] = dfByDatumExcludeBTSP[
dfByDatumExcludeBTSP["benchmarkName"] == benchmarkName
]
numOfData += len(
dfByDatumExcludeBTSP[dfByDatumExcludeBTSP["benchmarkName"] == benchmarkName]
)
numOfData
156
listForDF = []
for benchmarkName in benchmarkNamesExcludeBTSP:
listForDF.append(
returnSeriesOfDatumPerBenchmark(inputDF=dictForLatexTable[benchmarkName])
)
DF = pd.DataFrame(listForDF)
print(DF.to_latex(index=False))
\begin{tabular}{lllll}
\toprule
benchmarkName & ModelIp & ModelLog & ModelBranch & ModelLin \\
\midrule
CG(22) & 0(-) & 13(11.7,11.7) & 18(0.0,0.0) & 69(0.0,0.0) \\
EP(9) & 0(-) & 0(-) & 0(-) & 100(0.0,0.0) \\
FT(32) & 9(0.0,0.0) & 0(-) & 0(-) & 91(0.0,0.0) \\
IS(21) & 19(0.0,0.0) & 0(-) & 0(-) & 81(0.0,0.0) \\
LU(42) & 30(0.0,25.9) & 0(-) & 0(-) & 70(0.0,0.0) \\
MG(30) & 0(-) & 3(0.5,0.5) & 0(-) & 97(0.0,3.4) \\
\bottomrule
\end{tabular}
/tmp/ipykernel_137/3341568465.py:8: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. print(DF.to_latex(index=False))
test_returnSeriesOfDatumPerBenchmark()
resultIs = dictForLatexTable["is"]
# resultIs
resultIsAtModelBranch = resultIs[resultIs["objectBestModelName"] == "ModelBranch"]
datumX = resultIsAtModelBranch["usedDataX"].tolist()
datumY = resultIsAtModelBranch["usedDataY"].tolist()
datumX
datumY
# returnSeriesOfData(benchmarkName="is", functionName="double_randlc(double_*_double_*)", rawX=dataX, rawY=dataY, fixProcessOrClass="Class", fixed="B", targetProcess=256, targetBenchmarkClass="B", targetFunctionCallNum=-1, csvDirPath="./csv_files")
[]
[]
resultIs = dictForLatexTable["ft"]
# resultIs
resultIsAtModelBranch = resultIs[resultIs["objectBestModelName"] == "ModelBranch"]
resultIsAtModelBranchOfNotLowMAPE = resultIsAtModelBranch[
resultIsAtModelBranch["MAPEOfBestModel"] > 1
]
resultIsAtModelBranchOfNotLowMAPE
datumX = resultIsAtModelBranchOfNotLowMAPE["usedDataX"].tolist()
datumY = resultIsAtModelBranchOfNotLowMAPE["usedDataY"].tolist()
datumX
datumY
for dataIndex in range(len(datumX)):
plt.figure()
plt.scatter(datumX[dataIndex], datumY[dataIndex])
# returnSeriesOfData(benchmarkName="is", functionName="double_randlc(double_*_double_*)", rawX=dataX, rawY=dataY, fixProcessOrClass="Class", fixed="B", targetProcess=256, targetBenchmarkClass="B", targetFunctionCallNum=-1, csvDirPath="./csv_files")
| benchmarkName | functionName | usedDataX | usedDataY | numOfData | ProcessOrClass | fixed | targetProcess | targetProblemSize | targetNumOfFunctionCall | objectLinModel | MAPEOfLinModel | objectIpModel | MAPEOfIpModel | objectLogModel | MAPEOfLogModel | objectBranchModel | MAPEOfBranchModel | objectBestModelName | MAPEOfBestModel | RelativeErrorRate |
|---|
[]
[]
%reset -f
# ノートブック中で変数のみを記述することでデータフレームをきれいに表示させる設定の有効化
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
# ipynb形式のライブラリのインポート
%run ./lib/lib.ipynb
# 生データの入ったCSVファイルの保持されたディレクトリ名を格納している変数
csvDirPath = "./csv_files/"
benchmarkNames = ["cg", "ep", "ft", "is", "lu", "mg"]
DEBUG:__main__:hello
# TODO:BT, SP以外のベンチマーク名を入れる
benchmarkNames = ["cg", "ep", "ft", "is", "lu", "mg"]
classes = ["A", "B", "C", "D"]
processes = [2, 4, 8, 16, 32, 64, 128, 256]
dictForSummarizedResult = {}
columnName = ["benchmarkName", "functionName", "score", "relativeErrorRate"]
dfForSummarizedResult = pd.DataFrame(columns=columnName)
for benchmarkName in benchmarkNames:
# ベンチマークごとにscoreを保持するためのリスト
listForSummarizedResultPerBenchmarkName = []
# 学習用生データ
DF = returnCollectedExistingData(
benchmarkNames=[benchmarkName],
classes=classes,
processes=processes,
csvDirPath="./csv_files/",
)
# 重複のない関数名のリスト
functionNames = list(set(DF["functionName"]))
usefulFunctionNames = []
# このループで関数ごとのデータが問題サイズパターン数xコア数パターン数 分だけ存在する関数名のリストを作成する
for functionName in functionNames:
# 関数ごとに生データを集計
dfPerFunction = DF[DF["functionName"] == functionName]
if len(classes) * len(processes) == len(dfPerFunction):
usefulFunctionNames.append(functionName)
if len(usefulFunctionNames) == 0:
continue
# 関数ごとのデータを抽出
for functionName in usefulFunctionNames:
# 問題サイズを数値化したカラムを追加
listBenchmarkClass = DF["benchmarkClass"].tolist()
DFWithNumInBenchmarkClass = DF.assign(
benchmarkClassInNum=convertBenchmarkClasses_problemSizeInNPB(
listBenchmarkClass
)
)
# 学習用データ
dfPerFunctionForTrain = DFWithNumInBenchmarkClass[
(DFWithNumInBenchmarkClass["functionName"] == functionName)
]
dfPerFunctionForTest = DFWithNumInBenchmarkClass[
(DFWithNumInBenchmarkClass["functionName"] == functionName)
& (DFWithNumInBenchmarkClass["benchmarkClass"] == "D")
& (DFWithNumInBenchmarkClass["process"] == 256)
]
# x:説明変数, t:目的変数
trainX = dfPerFunctionForTrain[["process", "benchmarkClassInNum"]]
trainT = dfPerFunctionForTrain[["functionCallNum"]]
testX = dfPerFunctionForTest[["process", "benchmarkClassInNum"]]
testT = dfPerFunctionForTest[["functionCallNum"]]
# 重回帰分析する
reg_model = LinearRegression()
reg_model.fit(trainX, trainT)
# 関数ごとの結果をベンチマークごとの結果に入れる
scorePerFunction = reg_model.score(trainX, trainT)
listForSummarizedResultPerBenchmarkName.append(scorePerFunction)
# 予測を実施して、相対誤差を算出
predictedTByTestX = reg_model.predict(testX)
predictedData = predictedTByTestX[0][0]
realData = testT["functionCallNum"].tolist()[0]
relativeErrorPerFunction = abs(predictedData - realData) / realData * 100
##
dfPerFunction = pd.DataFrame(
index=columnName,
data=[
benchmarkName,
functionName,
scorePerFunction,
relativeErrorPerFunction,
],
).T
dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
# ( A ~ D ) * (1 ~ 256) のすべての条件を
# 満たしていたら、リストに追加
# 満たしていなければ、なにもしない
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
LinearRegression()
/tmp/ipykernel_137/3138100627.py:75: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. dfForSummarizedResult = dfForSummarizedResult.append(dfPerFunction)
dfForSummarizedResult
| benchmarkName | functionName | score | relativeErrorRate | |
|---|---|---|---|---|
| 0 | cg | CG | 1.0 | 0.0 |
| 0 | cg | VECSET | 0.632067 | 19.005114 |
| 0 | cg | SPARSE | 1.0 | 0.0 |
| 0 | cg | MPI_Reduce() | 1.0 | 0.0 |
| 0 | cg | MAKEA | 1.0 | 0.0 |
| 0 | cg | CONJ_GRAD | 0.355806 | 0.012929 |
| 0 | cg | ALLOC_SPACE | 1.0 | 0.0 |
| 0 | cg | MPI_Finalize() | 1.0 | 0.0 |
| 0 | cg | MPI_Barrier() | 1.0 | 0.0 |
| 0 | cg | MPI_Comm_size() | 1.0 | 0.0 |
| 0 | cg | ICNVRT | 0.630252 | 19.304067 |
| 0 | cg | MPI_Send() | 0.484017 | 1.208868 |
| 0 | cg | INITIALIZE_MPI | 1.0 | 0.0 |
| 0 | cg | MPI_Comm_rank() | 1.0 | 0.0 |
| 0 | cg | MPI_Irecv() | 0.484017 | 1.208868 |
| 0 | cg | .TAU_application | 1.0 | 0.0 |
| 0 | cg | MPI_Init() | 1.0 | 0.0 |
| 0 | cg | SETUP_SUBMATRIX_INFO | 1.0 | 0.0 |
| 0 | cg | MPI_Wait() | 0.484017 | 1.208868 |
| 0 | cg | MPI_Bcast() | 1.0 | 0.0 |
| 0 | cg | SETUP_PROC_INFO | 1.0 | 0.0 |
| 0 | cg | SPRNVC | 0.632067 | 19.005114 |
| 0 | ep | MPI_Barrier() | 1.0 | 0.0 |
| 0 | ep | EMBAR | 1.0 | 0.0 |
| 0 | ep | MPI_Comm_size() | 1.0 | 0.0 |
| 0 | ep | .TAU_application | 1.0 | 0.0 |
| 0 | ep | MPI_Init() | 1.0 | 0.0 |
| 0 | ep | MPI_Bcast() | 1.0 | 0.0 |
| 0 | ep | MPI_Allreduce() | 1.0 | 0.0 |
| 0 | ep | MPI_Comm_rank() | 1.0 | 0.0 |
| 0 | ep | MPI_Finalize() | 1.0 | 0.0 |
| 0 | ft | FFTZ2 | 0.347159 | 1655.847686 |
| 0 | ft | MPI_Alltoall() | 0.391709 | 6.076808 |
| 0 | ft | MPI_Reduce() | 0.381319 | 1.267384 |
| 0 | ft | VERIFY | 1.0 | 0.0 |
| 0 | ft | COMPUTE_INITIAL_CONDITIONS | 1.0 | 0.0 |
| 0 | ft | FFT | 0.381319 | 1.173504 |
| 0 | ft | MPI_Finalize() | 1.0 | 0.0 |
| 0 | ft | ALLOC_SPACE | 1.0 | 0.0 |
| 0 | ft | MPI_Barrier() | 1.0 | 0.0 |
| 0 | ft | MPI_Comm_size() | 1.0 | 0.0 |
| 0 | ft | CFFTS1 | 0.388327 | 3.625156 |
| 0 | ft | SET_CLASS | 0.268232 | 1890.922677 |
| 0 | ft | COMPUTE_INDEXMAP | 1.0 | 0.0 |
| 0 | ft | MPI_Comm_rank() | 1.0 | 0.0 |
| 0 | ft | IPOW46 | 1.0 | 0.0 |
| 0 | ft | EVOLVE | 0.381319 | 1.267384 |
| 0 | ft | .TAU_application | 1.0 | 0.0 |
| 0 | ft | ILOG2 | 0.381319 | 1.145227 |
| 0 | ft | MPI_Init() | 1.0 | 0.0 |
| 0 | ft | MPI_Comm_split() | 1.0 | 0.0 |
| 0 | ft | CHECKSUM | 0.381319 | 1.267384 |
| 0 | ft | CFFTZ | 0.345678 | 1624.48595 |
| 0 | ft | FT | 1.0 | 0.0 |
| 0 | ft | MPI_Bcast() | 1.0 | 0.0 |
| 0 | ft | FFT_INIT | 1.0 | 0.0 |
| 0 | ft | SETUP | 1.0 | 0.0 |
| 0 | is | MPI_Finalize() | 1.0 | 0.0 |
| 0 | is | MPI_Comm_size() | 1.0 | 0.0 |
| 0 | is | MPI_Comm_rank() | 1.0 | 0.0 |
| 0 | is | int_main(int_char_**) | 1.0 | 0.0 |
| 0 | is | .TAU_application | 1.0 | 0.0 |
| 0 | is | MPI_Init() | 1.0 | 0.0 |
| 0 | lu | MPI_Finalize() | 1.0 | 0.0 |
| 0 | lu | MPI_Comm_size() | 1.0 | 0.0 |
| 0 | lu | MPI_Send() | 0.833516 | 8.797387 |
| 0 | lu | MPI_Allreduce() | 0.017632 | 0.307316 |
| 0 | lu | MPI_Comm_rank() | 1.0 | 0.0 |
| 0 | lu | MPI_Irecv() | 0.435742 | 9.363135 |
| 0 | lu | .TAU_application | 1.0 | 0.0 |
| 0 | lu | MPI_Init() | 1.0 | 0.0 |
| 0 | lu | MPI_Wait() | 0.435742 | 9.363135 |
| 0 | lu | MPI_Bcast() | 0.017632 | 2.765847 |
inputDF = dfForSummarizedResult
benchmarkNamesInDF = list(set(dfForSummarizedResult["benchmarkName"].tolist()))
listForLatexTable = []
for benchmarkName in benchmarkNamesInDF:
print(benchmarkName)
inputDFPerBenchmark = inputDF[inputDF["benchmarkName"] == benchmarkName]
meanData = inputDFPerBenchmark.mean()
print(type(meanData))
meanData["benchmarkName"] = f"{benchmarkName.upper()}({len(inputDFPerBenchmark)})"
listForLatexTable.append(meanData)
DF = pd.DataFrame(listForLatexTable)
DF = DF.sort_index(axis="columns")
DF
# relativeErrorの単位は[%]ではない。scoreの値はscore()で取得できたもの
DF.columns = ["ベンチマーク名(関数の個数)", "MAPE(予測対象関数コール回数に対する)", "決定係数"]
print(DF.to_latex(index=False))
ft <class 'pandas.core.series.Series'> ep <class 'pandas.core.series.Series'> lu <class 'pandas.core.series.Series'> is <class 'pandas.core.series.Series'> cg <class 'pandas.core.series.Series'>
/tmp/ipykernel_137/732221415.py:10: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError. Select only valid columns before calling the reduction. meanData = inputDFPerBenchmark.mean() /tmp/ipykernel_137/732221415.py:10: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError. Select only valid columns before calling the reduction. meanData = inputDFPerBenchmark.mean() /tmp/ipykernel_137/732221415.py:10: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError. Select only valid columns before calling the reduction. meanData = inputDFPerBenchmark.mean() /tmp/ipykernel_137/732221415.py:10: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError. Select only valid columns before calling the reduction. meanData = inputDFPerBenchmark.mean() /tmp/ipykernel_137/732221415.py:10: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError. Select only valid columns before calling the reduction. meanData = inputDFPerBenchmark.mean()
| benchmarkName | relativeErrorRate | score | |
|---|---|---|---|
| 0 | FT(26) | 199.503045 | 0.755681 |
| 1 | EP(9) | 0.000000 | 1.000000 |
| 2 | LU(10) | 3.059682 | 0.674026 |
| 3 | IS(6) | 0.000000 | 1.000000 |
| 4 | CG(22) | 2.770629 | 0.850102 |
\begin{tabular}{lrr}
\toprule
ベンチマーク名(関数の個数) & MAPE(予測対象関数コール回数に対する) & 決定係数 \\
\midrule
FT(26) & 199.503045 & 0.755681 \\
EP(9) & 0.000000 & 1.000000 \\
LU(10) & 3.059682 & 0.674026 \\
IS(6) & 0.000000 & 1.000000 \\
CG(22) & 2.770629 & 0.850102 \\
\bottomrule
\end{tabular}
/tmp/ipykernel_137/732221415.py:20: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. print(DF.to_latex(index=False))
benchmarkNames = ["cg", "ep", "ft", "is", "lu", "mg"]
classes = ["A", "B", "C", "D"]
processes = [2, 4, 8, 16, 32, 64, 128, 256]
targetClass = classes[-1]
targetProcess = processes[-1]
# 学習用生データ
DF = returnCollectedExistingData(
benchmarkNames=benchmarkNames,
classes=classes,
processes=processes,
csvDirPath="./csv_files/",
)
DFByValidFunction = returnDFwithFunctionsExecUnderAllConditions(
inputDF=DF, classes=classes, processes=processes
)
# 問題サイズを数値化したカラムを追加
listBenchmarkClass = DFByValidFunction["benchmarkClass"].tolist()
# 生データにカラムなどを加えた整形済みのDF
shapedDF = DFByValidFunction.assign(
benchmarkClassInNum=convertBenchmarkClasses_problemSizeInNPB(listBenchmarkClass)
)
# 説明変数のカラム名のリスト
expVarColNames = ["process", "benchmarkClassInNum"]
# 目的変数のカラム名のリスト
resVarColNames = ["functionCallNum"]
# データ内にあるベンチマーク名のリスト
benchmarkNames = set(shapedDF["benchmarkName"].tolist())
# 集計前のデータを作成
# DFで[functionName | benchmarkName | expVarDatumDict | resVarDatumDict | modelsName | dictAggregateResult]がカラム名
dictToMakeSummary = {}
for benchmarkName in benchmarkNames:
DFperBenchmark = shapedDF[shapedDF["benchmarkName"] == benchmarkName]
# すべての条件で実行された関数名のリスト
validFunctionNames = list(set(DFperBenchmark["functionName"].tolist()))
listToMakeDF = []
print(f"benchmakName={benchmarkName}, 関数の個数:{len(validFunctionNames)}")
for validFunctionName in validFunctionNames:
# 3モデルを一気に作成するmodels()を利用
inputDFperFunction = DFperBenchmark[
(DFperBenchmark["functionName"] == validFunctionName)
& (DFperBenchmark["benchmarkName"] == benchmarkName)
].reset_index()
targetDFperFunction = inputDFperFunction[
(inputDFperFunction["benchmarkClass"] == targetClass)
& (inputDFperFunction["process"] == targetProcess)
]
# 説明変数のカラム名リストを作成
expVarColNames = ["process", "benchmarkClassInNum"]
# 目的変数のカラム名リストを作成
resVarColNames = ["functionCallNum"]
# モデルを一括で作成
if len(targetDFperFunction) == 0:
print(f"benchmarkName={benchmarkName}, functionName={validFunctionName}")
continue
returnedDF = returnDFtoMakeSummary(
inputDF=inputDFperFunction,
benchmarkName=benchmarkName,
validFunctionName=validFunctionName,
targetClass=targetClass,
targetProcess=targetProcess,
expVarColNames=expVarColNames,
resVarColNames=resVarColNames,
)
listToMakeDF.append(returnedDF)
if len(listToMakeDF) == 0:
continue
inputDFtoMakeDFperBenchmark = pd.concat(listToMakeDF).reset_index(drop=True)
returnedDict = convertDictToMakeSummary(
inputDF=inputDFtoMakeDFperBenchmark,
modelAdoptionRate=True,
averageRelativeError=True,
)
dictToMakeSummary[benchmarkName] = returnedDict
# TODO:作成したDFを入力として集計関数を実行
modelAdoptionRate = returnedDict["modelAdoptionRate"]
averageRelativeError = returnedDict["averageRelativeError"]
returnedDict
modelAdoptionRate
averageRelativeError
benchmakName=ft, 関数の個数:13 benchmakName=ep, 関数の個数:1 benchmakName=lu, 関数の個数:8 benchmarkName=lu, functionName=void_create_seq(double_double) benchmarkName=lu, functionName=double_randlc(double_*_double_*) benchmarkName=lu, functionName=MPI_Alltoallv() benchmarkName=lu, functionName=void_alloc_space(void) benchmarkName=lu, functionName=double_find_my_seed(int_int_long_double_double) benchmarkName=lu, functionName=MPI_Comm_dup() benchmarkName=lu, functionName=void_full_verify(void) benchmarkName=lu, functionName=void_rank(int) benchmakName=is, 関数の個数:8 benchmakName=cg, 関数の個数:10
{'modelAdoptionRate': {'modelLin': {'count': 7,
'min': array([0.]),
'max': array([76.36553853])},
'modelIp': {'count': 1,
'min': array([18.97934597]),
'max': array([18.97934597])},
'modelLog': {'count': 2,
'min': array([37.28698245]),
'max': array([37.28698245])}},
'averageRelativeError': 10.8345}
{'modelLin': {'count': 7, 'min': array([0.]), 'max': array([76.36553853])},
'modelIp': {'count': 1,
'min': array([18.97934597]),
'max': array([18.97934597])},
'modelLog': {'count': 2,
'min': array([37.28698245]),
'max': array([37.28698245])}}
10.8345
columnBenchmarkName = []
# 採用割合
columnAdoptionRateLog = []
columnAdoptionRateIp = []
columnAdoptionRateLin = []
# 平均相対誤差率
columnAverageRelativeError = []
for benchmarkName in dictToMakeSummary.keys():
# 関数の個数
numOfFunctions = 0
# 採用割合
modelAdoptionRate = dictToMakeSummary[benchmarkName]["modelAdoptionRate"]
modelNames = list(modelAdoptionRate.keys())
for modelName in modelNames:
# print(modelName)
numOfFunctions += modelAdoptionRate[modelName]["count"]
dictToMakeSummary[benchmarkName]["関数の個数"] = numOfFunctions
# 採用割合を算出するモデルが3つであることを仮定している
# modelLog
adoptionRateLog = int(modelAdoptionRate["modelLog"]["count"] / numOfFunctions * 100)
if modelAdoptionRate["modelLog"]["count"] == 0:
logMinMAPE = "-"
logMaxMAPE = "-"
else:
logMinMAPE = int(float(modelAdoptionRate["modelLog"]["min"]) * 10) / 10
logMaxMAPE = int(float(modelAdoptionRate["modelLog"]["max"]) * 10) / 10
# modelIp
adoptionRateIp = int(modelAdoptionRate["modelIp"]["count"] / numOfFunctions * 100)
if modelAdoptionRate["modelIp"]["count"] == 0:
ipMinMAPE = "-"
ipMaxMAPE = "-"
else:
ipMinMAPE = int(float(modelAdoptionRate["modelIp"]["min"]) * 10) / 10
ipMaxMAPE = int(float(modelAdoptionRate["modelIp"]["max"]) * 10) / 10
# modelLin
adoptionRateLin = 100 - adoptionRateIp - adoptionRateLog
if modelAdoptionRate["modelLin"]["count"] == 0:
linMinMAPE = "-"
linMaxMAPE = "-"
else:
linMinMAPE = int(float(modelAdoptionRate["modelLin"]["min"]) * 10) / 10
linMaxMAPE = int(float(modelAdoptionRate["modelLin"]["max"]) * 10) / 10
# Latex化するためにカラムとして入れる
columnBenchmarkName.append(f"{benchmarkName.upper()}({numOfFunctions})")
columnAdoptionRateLog.append(f"{adoptionRateLog}({logMinMAPE},{logMaxMAPE})")
columnAdoptionRateIp.append(f"{adoptionRateIp}({ipMinMAPE},{ipMaxMAPE})")
columnAdoptionRateLin.append(f"{adoptionRateLin}({linMinMAPE},{linMaxMAPE})")
# 相対誤差率
averageRelativeError = dictToMakeSummary[benchmarkName]["averageRelativeError"]
columnAverageRelativeError.append(int(averageRelativeError * 100) / 100)
dictToMakeSummary
columnBenchmarkName
columnAdoptionRateLog
columnAdoptionRateIp
columnAdoptionRateLin
columnAverageRelativeError
# 採用割合
採用割合 = pd.DataFrame(
{
"ベンチマーク名": columnBenchmarkName,
"反比例モデル": columnAdoptionRateIp,
"対数モデル": columnAdoptionRateLog,
"線形モデル": columnAdoptionRateLin,
}
)
# 相対誤差率
相対誤差率 = pd.DataFrame(
{"ベンチマーク名": columnBenchmarkName, "相対誤差率[%]": columnAverageRelativeError}
)
print(採用割合.to_latex(index=False))
print(相対誤差率.to_latex(index=False))
{'ft': {'modelAdoptionRate': {'modelLin': {'count': 8,
'min': array([0.]),
'max': array([1484.53872393])},
'modelIp': {'count': 5,
'min': array([5.69381599]),
'max': array([6.85185185])},
'modelLog': {'count': 0, 'min': inf, 'max': 0.0}},
'averageRelativeError': 13.139000000000001,
'関数の個数': 13},
'ep': {'modelAdoptionRate': {'modelLin': {'count': 1,
'min': array([0.]),
'max': 0.0},
'modelIp': {'count': 0, 'min': inf, 'max': 0.0},
'modelLog': {'count': 0, 'min': inf, 'max': 0.0}},
'averageRelativeError': 0.0,
'関数の個数': 1},
'is': {'modelAdoptionRate': {'modelLin': {'count': 7,
'min': array([0.]),
'max': 0.0},
'modelIp': {'count': 1,
'min': array([1237.54378251]),
'max': array([1237.54378251])},
'modelLog': {'count': 0, 'min': inf, 'max': 0.0}},
'averageRelativeError': 2.5005,
'関数の個数': 8},
'cg': {'modelAdoptionRate': {'modelLin': {'count': 7,
'min': array([0.]),
'max': array([76.36553853])},
'modelIp': {'count': 1,
'min': array([18.97934597]),
'max': array([18.97934597])},
'modelLog': {'count': 2,
'min': array([37.28698245]),
'max': array([37.28698245])}},
'averageRelativeError': 10.8345,
'関数の個数': 10}}
['FT(13)', 'EP(1)', 'IS(8)', 'CG(10)']
['0(-,-)', '0(-,-)', '0(-,-)', '20(37.2,37.2)']
['38(5.6,6.8)', '0(-,-)', '12(1237.5,1237.5)', '10(18.9,18.9)']
['62(0.0,1484.5)', '100(0.0,0.0)', '88(0.0,0.0)', '70(0.0,76.3)']
[13.13, 0.0, 2.5, 10.83]
\begin{tabular}{llll}
\toprule
ベンチマーク名 & 反比例モデル & 対数モデル & 線形モデル \\
\midrule
FT(13) & 38(5.6,6.8) & 0(-,-) & 62(0.0,1484.5) \\
EP(1) & 0(-,-) & 0(-,-) & 100(0.0,0.0) \\
IS(8) & 12(1237.5,1237.5) & 0(-,-) & 88(0.0,0.0) \\
CG(10) & 10(18.9,18.9) & 20(37.2,37.2) & 70(0.0,76.3) \\
\bottomrule
\end{tabular}
\begin{tabular}{lr}
\toprule
ベンチマーク名 & 相対誤差率[\%] \\
\midrule
FT(13) & 13.13 \\
EP(1) & 0.00 \\
IS(8) & 2.50 \\
CG(10) & 10.83 \\
\bottomrule
\end{tabular}
/tmp/ipykernel_137/832305136.py:27: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. print(採用割合.to_latex(index=False)) /tmp/ipykernel_137/832305136.py:29: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. print(相対誤差率.to_latex(index=False))
class ModelBranchForMultipleRegression(ModelBaseForMultipleRegression):
# 線形モデル(重回帰分析)
def transformDataForModel(self, inputDF):
# inputDFで与えられたデータをモデルに適した形に変形する
return inputDF
def setUpDataBeforeCalcLr(self):
# 説明変数・目的変数を変換する関数
# 分岐を実際に実行するかを判断するためのフラグ
enableBranch = False
# enableBranch == False ---> 分岐しない
# enableBranch == True ---> 分岐する
# TODO:分岐点の探索
# enableBranchの値に応じて分岐するかどうか
# モデル構築用データ
self.dataXForPredict = self.transformDataForModel(self.rawExplanaoryVariable)
self.dataTForPredict = self.transformDataForModel(self.rawResponseVariable)
# テスト用データ
self.dataXForTest = self.transformDataForModel(
self.rawExplanaoryVariableForTest
)
self.dataTForTest = self.transformDataForModel(self.rawResponseVariableForTest)
def calcLr(self):
# 実際にモデルを構築する
self.lr = LinearRegression()
self.lr.fit(self.dataXForPredict, self.dataTForPredict)
def predict(self, inputDF):
# inputDFのデータから構築されたモデルを使って予測を行う
# inputDFから説明変数データのみを取得
inputDFOnlyExplanatoryVariableColumn = inputDF[
self.explanatoryVariableColumnNames
]
# 予測を実行
result = self.lr.predict(inputDFOnlyExplanatoryVariableColumn)
return result
def test_ModelBranchForMultipleRegression():
# 単なる線形モデル
# 説明変数
plotX = np.linspace(0, 20, 10)
plotY = np.linspace(20, 40, 10)
plotZ = np.linspace(40, 60, 10)
# 目的変数
plotT = plotX + 2 * plotY + 3 * plotZ + 4
# DFを作成する
# カラム名のリスト
columnNames = ["plotX", "plotY", "plotZ", "plotT"]
datumForDF = [plotX, plotY, plotZ, plotT]
inputDFForTest = pd.DataFrame(index=columnNames, data=datumForDF).T
inputDFForTest["functionName"] = "functionName"
# 目的変数・説明変数のカラム名のリスト
# 目的変数のカラム名のリスト
columnNamesForExp = columnNames[:-1]
# 説明変数のカラム名のリスト
columnNamesForRes = columnNames[-1:]
# 予測をする
# モデルオブジェクトの作成
objectModel = ModelBranchForMultipleRegression(
inputDF=inputDFForTest,
explanatoryVariableColumnNames=columnNamesForExp,
responseVariableColumnNames=columnNamesForRes,
conditionDictForTest={},
)
# モデルの生成の準備
objectModel.setUpDataBeforeCalcLr()
# モデルの生成
objectModel.calcLr()
# モデルによる予測
# 入力データDFを作成
inputDFForPredict = pd.DataFrame(inputDFForTest.tail(1))
predictedNum = objectModel.predict(inputDFForPredict)
# 相対誤差率でテスト対象のデータが想定通りに動作しているかを判断する
# 相対誤差率を計算するために実データを取得する
realNum = plotT[-1]
relativeErrorRate = returnRelativeErrorRate(
realNum=realNum, predictedNum=predictedNum
)
assert relativeErrorRate < 1
# 線形飽和モデル
branchIndex = 5
# 説明変数
branchX = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
branchY = [11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
branchZ = [21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
# 目的変数
branchT_X = returnListForBranchModel(
inputList=branchX, branchIndex=branchIndex, a=1, b=2
)
branchT_Y = returnListForBranchModel(
inputList=branchY, branchIndex=branchIndex, a=3, b=4
)
branchT_Z = returnListForBranchModel(
inputList=branchZ, branchIndex=branchIndex, a=5, b=6
)
branchT = []
for numX, numY, numZ in zip(branchT_X, branchT_Y, branchT_Z):
branchT.append(numX + numY + numZ)
# DFを作成する
# カラム名のリスト
inputDFForTest = pd.DataFrame(
{"branchX": branchX, "branchY": branchY, "branchZ": branchZ, "branchT": branchT}
)
# 目的変数のカラム名のリスト
expVarName = ["branchX", "branchY", "branchZ"]
# 説明変数のカラム名のリスト
resVarName = ["branchT"]
# 関数名
inputDFForTest["functionName"] = "functionName"
# 予測のためのモデルオブジェクトの作成
objectModel = ModelBranchForMultipleRegression(
inputDF=inputDFForTest,
explanatoryVariableColumnNames=expVarName,
responseVariableColumnNames=resVarName,
conditionDictForTest={},
)
# モデルの生成の準備
objectModel.setUpDataBeforeCalcLr()
objectModel.calcLr()
# モデルによる予測
inputDFForPredict = pd.DataFrame(inputDFForTest.tail(1))
predictedNum = objectModel.predict(inputDFForPredict)
# 相対誤差率でテスト対象のデータに焚いてモデルが想定通りに動作しているかを判断する
realNum = branchT[-1]
relativeErrorRate = returnRelativeErrorRate(
realNum=realNum, predictedNum=predictedNum
)
print(f"relatieErrorRate={relativeErrorRate}")
assert relativeErrorRate < 1
# test_ModelBranchForMultipleRegression()
benchmarkNamesExcludeBTSP = ["cg", "ep", "ft", "is", "lu", "mg"]
# classes = ["A", "B", "C", "D"]
classes = ["B"]
processes = [2, 4, 8, 16, 32, 64, 128, 256]
targetIndex = -1
csvDirPath = "./csv_files/"
modelNames = ["ModelLin", "ModelIp", "ModelLog", "ModelBranch"]
dfByDatumExcludeBTSP = returnDFSummarizedData(
benchmarkNames=benchmarkNamesExcludeBTSP,
classes=classes,
processes=processes,
targetIndex=targetIndex,
csvDirPath=csvDirPath,
modelNames=modelNames,
)
# dfByDatumExcludeBTSP
dictForLatexTable = {}
numOfData = 0
for benchmarkName in benchmarkNamesExcludeBTSP:
dictForLatexTable[benchmarkName] = dfByDatumExcludeBTSP[
dfByDatumExcludeBTSP["benchmarkName"] == benchmarkName
]
numOfData += len(
dfByDatumExcludeBTSP[dfByDatumExcludeBTSP["benchmarkName"] == benchmarkName]
)
numOfData
listForDF = []
for benchmarkName in benchmarkNamesExcludeBTSP:
listForDF.append(
returnSeriesOfDatumPerBenchmark(inputDF=dictForLatexTable[benchmarkName])
)
DF = pd.DataFrame(listForDF)
print(DF.to_latex(index=False))
156
\begin{tabular}{lllll}
\toprule
benchmarkName & ModelIp & ModelLog & ModelBranch & ModelLin \\
\midrule
CG(22) & 0(-) & 13(11.7,11.7) & 18(0.0,0.0) & 69(0.0,0.0) \\
EP(9) & 0(-) & 0(-) & 0(-) & 100(0.0,0.0) \\
FT(32) & 9(0.0,0.0) & 0(-) & 0(-) & 91(0.0,0.0) \\
IS(21) & 19(0.0,0.0) & 0(-) & 0(-) & 81(0.0,0.0) \\
LU(42) & 30(0.0,25.9) & 0(-) & 0(-) & 70(0.0,0.0) \\
MG(30) & 0(-) & 3(0.5,0.5) & 0(-) & 97(0.0,3.4) \\
\bottomrule
\end{tabular}
/tmp/ipykernel_137/3514052145.py:39: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. print(DF.to_latex(index=False))
plotBase = list(range(20))
plotX = returnListForBranchModel(inputList=plotBase, branchIndex=13, a=2, b=3)
plotY = returnListForBranchModel(inputList=plotBase, branchIndex=16, a=4, b=5)
plotX = np.array(plotX)
plotY = np.array(plotY)
plotZ = plotX + plotY
# TODO:関数コール回数が同じだが、2変数が異なる変数かつそういったものが多数ある場合が分岐線
print(f"plotBase={plotBase}\nplotX={plotX}\nplotY={plotY}")
plotBase=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] plotX=[ 3 5 7 9 11 13 15 17 19 21 23 25 27 29 29 29 29 29 29 29] plotY=[ 5 9 13 17 21 25 29 33 37 41 45 49 53 57 61 65 69 69 69 69]
# 2点間を繋ぐ直線を返す関数
# 引数p1, p2 は [x, y, z] のような3つの要素で構成されるリスト
def returnStraightLineDataBetween2Points(p1=[], p2=[]):
if len(p1) == 0 or len(p2) == 0:
warnings.warn("引数が条件(要素数3のリストx2)を満たしていません")
return -1
ax = p1[0]
ay = p1[1]
az = p1[2]
bx = p2[0]
by = p2[1]
bz = p2[2]
iterList = np.arange(0, 1.01, 0.01)
retX = []
retY = []
retZ = []
for t in iterList:
x = (1 - t) * ax + t * bx
y = (1 - t) * ay + t * by
z = (1 - t) * az + t * bz
retX.append(x)
retY.append(y)
retZ.append(z)
returnDict = {"x": retX, "y": retY, "z": retZ}
return returnDict
# returnStraightLineDataBetween2Points()で返されたデータから、直線の切片と係数を返す関数
# 引数はreturnStraightLineDataBetween2Points()返り値
def returnCoefAndIntercept(inputDict):
lr = LinearRegression()
x = inputDict["x"]
y = inputDict["y"]
expDF = pd.DataFrame({"x": x})
resDF = pd.DataFrame({"y": y})
lr.fit(expDF, resDF)
coef_ = lr.coef_[0]
intercept_ = lr.intercept_
returnDict = {"coef_": coef_, "intercept_": intercept_}
return returnDict
# プロットするためのデータを作成する
plotZero = [0] * len(plotBase)
dataset01 = pd.DataFrame({"X": plotBase, "Y": plotZero, "Z": plotX})
dataset02 = pd.DataFrame({"X": plotZero, "Y": plotBase, "Z": plotY})
xDataSet01 = dataset01["X"].tolist()
zDataSet01 = dataset01["Z"].tolist()
yDataSet02 = dataset02["Y"].tolist()
zDataSet02 = dataset02["Z"].tolist()
branchIndexInDataSet01 = returnBranchIndexOfList(
inputListX=xDataSet01, inputListY=zDataSet01
)
branchIndexInDataSet02 = returnBranchIndexOfList(
inputListX=yDataSet02, inputListY=zDataSet02
)
p1x = dataset01["X"][branchIndexInDataSet01]
p1y = dataset01["Y"][branchIndexInDataSet01]
p1z = dataset01["Z"][branchIndexInDataSet01]
p2x = dataset02["X"][branchIndexInDataSet02]
p2y = dataset02["Y"][branchIndexInDataSet02]
p2z = dataset02["Z"][branchIndexInDataSet02]
splitLine = returnStraightLineDataBetween2Points(p1=[p1x, p1y, p1z], p2=[p2x, p2y, p2z])
splitLineDict = returnCoefAndIntercept(splitLine)
splitLineDict
{'coef_': array([-1.23076923]), 'intercept_': array([16.])}
# TODO:モデル作成とそれに関連する処理
# 1. 分岐点の前後にデータを分割
# データ1
data01BeforeBranchPoint = {
"X": dataset01["X"][:branchIndexInDataSet01],
"Y": dataset01["Y"][:branchIndexInDataSet01],
"Z": dataset01["Z"][:branchIndexInDataSet01],
}
data01AfterBranchPoint = {
"X": dataset01["X"][branchIndexInDataSet01:],
"Y": dataset01["Y"][branchIndexInDataSet01:],
"Z": dataset01["Z"][branchIndexInDataSet01:],
}
DF01BeforeBranchPoint = pd.DataFrame(data01BeforeBranchPoint)
DF01AfterBranchPoint = pd.DataFrame(data01AfterBranchPoint)
# データ2
data02BeforeBranchPoint = {
"X": dataset02["X"][:branchIndexInDataSet02],
"Y": dataset02["Y"][:branchIndexInDataSet02],
"Z": dataset02["Z"][:branchIndexInDataSet02],
}
data02AfterBranchPoint = {
"X": dataset02["X"][branchIndexInDataSet02:],
"Y": dataset02["Y"][branchIndexInDataSet02:],
"Z": dataset02["Z"][branchIndexInDataSet02:],
}
DF02BeforeBranchPoint = pd.DataFrame(data02BeforeBranchPoint)
DF02AfterBranchPoint = pd.DataFrame(data02AfterBranchPoint)
# 2. 分岐点の前後で、それぞれ異なるモデルを作成
# 2-1. 学習用データとするために分岐点前のデータと、分岐点後のデータをそれぞれ縦に結合する
dataBefore = pd.concat([DF01BeforeBranchPoint, DF02BeforeBranchPoint], axis=0)
dataAfter = pd.concat([DF01AfterBranchPoint, DF02AfterBranchPoint], axis=0)
# 2-2. 各データを説明変数と目的変数に分ける
DFBeforeExpVar = dataBefore[["X", "Y"]]
DFBeforeResVar = dataBefore[["Z"]]
DFAfterExpVar = dataAfter[["X", "Y"]]
DFAfterResVar = dataAfter[["Z"]]
# 2-2. 学習を行う
lr1 = LinearRegression()
lr2 = LinearRegression()
# 2-3. 分割されたデータのそれぞれにあった、fit()を実施する
lr1.fit(DFBeforeExpVar, DFBeforeResVar)
lr2.fit(DFAfterExpVar, DFAfterResVar)
# 3. 3Dプロットされることになる、すべての整数の点に対して予測・プロットを行う(x:0-18, y:0-18)
ExpectPointX = []
ExpectPointY = []
start = 0.4
stop = 19
step = 1.0
for i in np.arange(start, stop, step):
for j in np.arange(start, stop, step):
ExpectPointX.append(i)
ExpectPointY.append(j)
# 3-1. 分岐線の coef_, intercept_ を使って、データを分割する
coef_ = splitLineDict["coef_"]
intercept_ = splitLineDict["intercept_"]
expectPointXBefore = []
expectPointXAfter = []
expectPointYBefore = []
expectPointYAfter = []
for x, y in zip(ExpectPointX, ExpectPointY):
if y <= coef_ * x + intercept_:
expectPointXBefore.append(x)
expectPointYBefore.append(y)
else:
expectPointXAfter.append(x)
expectPointYAfter.append(y)
# 3-2. 予測を行う
datumForPlot = {"x": [], "y": [], "z": []}
for x, y in zip(expectPointXBefore, expectPointYBefore):
DFForPredict = pd.DataFrame({"X": [x], "Y": [y]})
result = lr1.predict(DFForPredict)
datumForPlot["x"].append(x)
datumForPlot["y"].append(y)
datumForPlot["z"].append(result)
for x, y in zip(expectPointXAfter, expectPointYAfter):
DFForPredict = pd.DataFrame({"X": [x], "Y": [y]})
result = lr2.predict(DFForPredict)
datumForPlot["x"].append(x)
datumForPlot["y"].append(y)
datumForPlot["z"].append(result)
LinearRegression()
LinearRegression()
fig = plt.figure()
ax = Axes3D(fig)
ax.set_xlabel("X")
ax.set_ylabel("Y軸")
ax.set_zlabel("Z軸")
ax.plot(plotBase, plotZero, plotX, color="red", label="自作データ1")
ax.plot(plotZero, plotBase, plotY, color="green", label="自作データ2")
ax.plot(splitLine["x"], splitLine["y"], splitLine["z"], color="blue", label="分岐線")
# 3-3. プロットを行う
ax.scatter(
datumForPlot["x"],
datumForPlot["y"],
datumForPlot["z"],
color="cyan",
label="モデルの表面",
)
ax.legend()
/tmp/ipykernel_137/4177612833.py:2: MatplotlibDeprecationWarning: Axes3D(fig) adding itself to the figure is deprecated since 3.4. Pass the keyword argument auto_add_to_figure=False and use fig.add_axes(ax) to suppress this warning. The default value of auto_add_to_figure will change to False in mpl3.5 and True values will no longer work in 3.6. This is consistent with other Axes classes. ax = Axes3D(fig)
Text(0.5, 0, 'X')
Text(0.5, 0.5, 'Y軸')
Text(0.5, 0, 'Z軸')
[<mpl_toolkits.mplot3d.art3d.Line3D at 0x7fd86b793f10>]
[<mpl_toolkits.mplot3d.art3d.Line3D at 0x7fd86b63c190>]
[<mpl_toolkits.mplot3d.art3d.Line3D at 0x7fd86b63cc10>]
<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x7fd86929dfc0>
<matplotlib.legend.Legend at 0x7fd86929d4b0>
class ModelBranchForMultipleRegression(ModelBaseForMultipleRegression):
# 線形飽和モデル(重回帰分析)
def transformDataForModel(self, inputDF):
# inputDFで与えられたデータをモデルに適した形に変形する
return inputDF
def setUpDataBeforeCalcLr(self):
# 説明変数・目的変数を変換する関数
# モデル構築用データ
self.dataXForPredict = self.transformDataForModel(self.rawExplanaoryVariable)
self.dataTForPredict = self.transformDataForModel(self.rawResponseVariable)
# テスト用データ
self.dataXForTest = self.transformDataForModel(
self.rawExplanaoryVariableForTest
)
self.dataTForTest = self.transformDataForModel(self.rawResponseVariableForTest)
def calcLr(self):
# 実際にモデルを構築する
self.lr = LinearRegression()
self.lr.fit(self.dataXForPredict, self.dataTForPredict)
def predict(self, inputDF):
# inputDFのデータから構築されたモデルを使って予測を行う
# inputDFから説明変数データのみを取得
inputDFOnlyExplanatoryVariableColumn = inputDF[
self.explanatoryVariableColumnNames
]
# 予測を実行
result = self.lr.predict(inputDFOnlyExplanatoryVariableColumn)
return result
dataset01, dataset02branchIndexInDataSet01, branchIndexInDataSet01splitLinebranchIndexInDataSet01
13
branchIndexInDataSet02
16
pointA = dataset01.iloc[branchIndexInDataSet01]
p1 = [pointA["X"], pointA["Y"], pointA["Z"]]
print(pointA)
print(p1)
X 13 Y 0 Z 29 Name: 13, dtype: int64 [13, 0, 29]
pointB = dataset02.iloc[branchIndexInDataSet02]
p2 = [pointB["X"], pointB["Y"], pointB["Z"]]
print(pointB)
print(p2)
X 0 Y 16 Z 69 Name: 16, dtype: int64 [0, 16, 69]
p1 = [5, 12, 13]
p2 = [7, 24, 25]
lineDataDict = returnStraightLineDataBetween2Points(p1=p1, p2=p2)
fig = plt.figure()
ax = plt.axes(projection="3d")
x = lineDataDict["x"]
y = lineDataDict["y"]
z = lineDataDict["z"]
ax.plot3D(x, y, z, "green")
ax.plot3D(p1[0], p1[1], p1[2])
ax.plot3D(p2[0], p2[1], p2[2])
# plotZero = [0]*len(plotBase)
# ax.scatter(plotBase, plotZero, plotX)
# ax.scatter(plotZero, plotBase, plotY)
plt.show()
[<mpl_toolkits.mplot3d.art3d.Line3D at 0x7fd868eb11b0>]
[<mpl_toolkits.mplot3d.art3d.Line3D at 0x7fd868eb1570>]
[<mpl_toolkits.mplot3d.art3d.Line3D at 0x7fd868eb1870>]
benchmarkNames = ["cg"]
classes = ["A", "B", "C", "D"]
processes = [2, 4, 8, 16, 32, 64, 128, 256]
csvDirPath = "./csv_files/"
modelNames = ["ModelBranch", "ModelIp", "ModelLog", "ModelLin"]
rawDataDF = returnCollectedExistingData(
benchmarkNames=benchmarkNames,
classes=classes,
processes=processes,
csvDirPath="./csv_files/",
)
functionNames = sorted(list(set(rawDataDF["functionName"].tolist())))
functionNamesToVis = [
".TAU_application",
"CONJ_GRAD",
"ICNVRT",
"MPI_Irecv()",
"SPRNVC",
]
rawDataDF
for functionName in functionNames:
# 関数ごとのデータとして抽出
functionCallCount = rawDataDF[rawDataDF["functionName"] == functionName]
# 文字による問題サイズを数値化
benchmarkClasses = functionCallCount["benchmarkClass"].tolist()
functionCallCount["benchmarkClassInNum"] = convertBenchmarkClasses_problemSizeInNPB(
inputList=benchmarkClasses
)
if functionName in functionNamesToVis:
# プロットの下準備
fig = plt.figure()
ax = Axes3D(fig)
# 図の軸ラベルを変更
ax.set_xlabel("コア数")
ax.set_ylabel("問題サイズ")
ax.set_zlabel("関数コール回数")
# 問題サイズごとにデータを抽出
for benchmarkClass in sorted(list(set(benchmarkClasses))):
condition = functionCallCount["benchmarkClass"] == benchmarkClass
functionCallPerBenchmarkClass = functionCallCount[condition]
# 各軸でプロットするデータを取得する
問題サイズ = functionCallPerBenchmarkClass["benchmarkClassInNum"].tolist()
コア数 = functionCallPerBenchmarkClass["process"].tolist()
関数コール回数 = functionCallPerBenchmarkClass["functionCallNum"].tolist()
# プロット
ax.scatter(
コア数, 問題サイズ, 関数コール回数, label=f"{functionName}(問題サイズ{benchmarkClass})"
)
ax.legend()
# print(functionCallCount)
| functionName | functionCallNum | benchmarkName | benchmarkClass | process | |
|---|---|---|---|---|---|
| 0 | .TAU_application | 1.0 | cg | A | 2 |
| 1 | CG | 1.0 | cg | A | 2 |
| 2 | MPI_Finalize() | 1.0 | cg | A | 2 |
| 3 | INITIALIZE_MPI | 1.0 | cg | A | 2 |
| 4 | MPI_Init() | 1.0 | cg | A | 2 |
| ... | ... | ... | ... | ... | ... |
| 17 | MPI_Reduce() | 1.0 | cg | D | 256 |
| 18 | SETUP_SUBMATRIX_INFO | 1.0 | cg | D | 256 |
| 19 | MPI_Comm_size() | 1.0 | cg | D | 256 |
| 20 | MPI_Comm_rank() | 1.0 | cg | D | 256 |
| 21 | SETUP_PROC_INFO | 1.0 | cg | D | 256 |
704 rows × 5 columns
/tmp/ipykernel_137/4053875472.py:29: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy functionCallCount["benchmarkClassInNum"] = convertBenchmarkClasses_problemSizeInNPB( /tmp/ipykernel_137/4053875472.py:36: MatplotlibDeprecationWarning: Axes3D(fig) adding itself to the figure is deprecated since 3.4. Pass the keyword argument auto_add_to_figure=False and use fig.add_axes(ax) to suppress this warning. The default value of auto_add_to_figure will change to False in mpl3.5 and True values will no longer work in 3.6. This is consistent with other Axes classes. ax = Axes3D(fig)
Text(0.5, 0, 'コア数')
Text(0.5, 0.5, '問題サイズ')
Text(0.5, 0, '関数コール回数')
<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x7fd868f13b50>
<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x7fd868f133d0>
<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x7fd868f13fd0>
<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x7fd868f388e0>
<matplotlib.legend.Legend at 0x7fd868f13310>
/tmp/ipykernel_137/4053875472.py:29: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy functionCallCount["benchmarkClassInNum"] = convertBenchmarkClasses_problemSizeInNPB( /tmp/ipykernel_137/4053875472.py:29: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy functionCallCount["benchmarkClassInNum"] = convertBenchmarkClasses_problemSizeInNPB( /tmp/ipykernel_137/4053875472.py:29: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy functionCallCount["benchmarkClassInNum"] = convertBenchmarkClasses_problemSizeInNPB( /tmp/ipykernel_137/4053875472.py:36: MatplotlibDeprecationWarning: Axes3D(fig) adding itself to the figure is deprecated since 3.4. Pass the keyword argument auto_add_to_figure=False and use fig.add_axes(ax) to suppress this warning. The default value of auto_add_to_figure will change to False in mpl3.5 and True values will no longer work in 3.6. This is consistent with other Axes classes. ax = Axes3D(fig)
Text(0.5, 0, 'コア数')
Text(0.5, 0.5, '問題サイズ')
Text(0.5, 0, '関数コール回数')
<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x7fd868ee7490>
<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x7fd868f388b0>
<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x7fd868d6e200>
<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x7fd868d6e7a0>
<matplotlib.legend.Legend at 0x7fd868ee7430>
/tmp/ipykernel_137/4053875472.py:29: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy functionCallCount["benchmarkClassInNum"] = convertBenchmarkClasses_problemSizeInNPB( /tmp/ipykernel_137/4053875472.py:36: MatplotlibDeprecationWarning: Axes3D(fig) adding itself to the figure is deprecated since 3.4. Pass the keyword argument auto_add_to_figure=False and use fig.add_axes(ax) to suppress this warning. The default value of auto_add_to_figure will change to False in mpl3.5 and True values will no longer work in 3.6. This is consistent with other Axes classes. ax = Axes3D(fig)
Text(0.5, 0, 'コア数')
Text(0.5, 0.5, '問題サイズ')
Text(0.5, 0, '関数コール回数')
<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x7fd868f3ab60>
<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x7fd868d6e6b0>
<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x7fd868dd41f0>
<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x7fd868dd46a0>
<matplotlib.legend.Legend at 0x7fd868d56260>
/tmp/ipykernel_137/4053875472.py:29: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy functionCallCount["benchmarkClassInNum"] = convertBenchmarkClasses_problemSizeInNPB( /tmp/ipykernel_137/4053875472.py:29: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy functionCallCount["benchmarkClassInNum"] = convertBenchmarkClasses_problemSizeInNPB( /tmp/ipykernel_137/4053875472.py:29: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy functionCallCount["benchmarkClassInNum"] = convertBenchmarkClasses_problemSizeInNPB( /tmp/ipykernel_137/4053875472.py:29: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy functionCallCount["benchmarkClassInNum"] = convertBenchmarkClasses_problemSizeInNPB( /tmp/ipykernel_137/4053875472.py:29: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy functionCallCount["benchmarkClassInNum"] = convertBenchmarkClasses_problemSizeInNPB( /tmp/ipykernel_137/4053875472.py:29: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy functionCallCount["benchmarkClassInNum"] = convertBenchmarkClasses_problemSizeInNPB( /tmp/ipykernel_137/4053875472.py:29: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy functionCallCount["benchmarkClassInNum"] = convertBenchmarkClasses_problemSizeInNPB( /tmp/ipykernel_137/4053875472.py:29: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy functionCallCount["benchmarkClassInNum"] = convertBenchmarkClasses_problemSizeInNPB( /tmp/ipykernel_137/4053875472.py:29: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy functionCallCount["benchmarkClassInNum"] = convertBenchmarkClasses_problemSizeInNPB( /tmp/ipykernel_137/4053875472.py:36: MatplotlibDeprecationWarning: Axes3D(fig) adding itself to the figure is deprecated since 3.4. Pass the keyword argument auto_add_to_figure=False and use fig.add_axes(ax) to suppress this warning. The default value of auto_add_to_figure will change to False in mpl3.5 and True values will no longer work in 3.6. This is consistent with other Axes classes. ax = Axes3D(fig)
Text(0.5, 0, 'コア数')
Text(0.5, 0.5, '問題サイズ')
Text(0.5, 0, '関数コール回数')
<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x7fd868d9c760>
<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x7fd86918c1c0>
<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x7fd86918ca60>
<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x7fd86918ed70>
<matplotlib.legend.Legend at 0x7fd86918d270>
/tmp/ipykernel_137/4053875472.py:29: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy functionCallCount["benchmarkClassInNum"] = convertBenchmarkClasses_problemSizeInNPB( /tmp/ipykernel_137/4053875472.py:29: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy functionCallCount["benchmarkClassInNum"] = convertBenchmarkClasses_problemSizeInNPB( /tmp/ipykernel_137/4053875472.py:29: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy functionCallCount["benchmarkClassInNum"] = convertBenchmarkClasses_problemSizeInNPB( /tmp/ipykernel_137/4053875472.py:29: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy functionCallCount["benchmarkClassInNum"] = convertBenchmarkClasses_problemSizeInNPB( /tmp/ipykernel_137/4053875472.py:29: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy functionCallCount["benchmarkClassInNum"] = convertBenchmarkClasses_problemSizeInNPB( /tmp/ipykernel_137/4053875472.py:29: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy functionCallCount["benchmarkClassInNum"] = convertBenchmarkClasses_problemSizeInNPB( /tmp/ipykernel_137/4053875472.py:29: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy functionCallCount["benchmarkClassInNum"] = convertBenchmarkClasses_problemSizeInNPB( /tmp/ipykernel_137/4053875472.py:36: MatplotlibDeprecationWarning: Axes3D(fig) adding itself to the figure is deprecated since 3.4. Pass the keyword argument auto_add_to_figure=False and use fig.add_axes(ax) to suppress this warning. The default value of auto_add_to_figure will change to False in mpl3.5 and True values will no longer work in 3.6. This is consistent with other Axes classes. ax = Axes3D(fig)
Text(0.5, 0, 'コア数')
Text(0.5, 0.5, '問題サイズ')
Text(0.5, 0, '関数コール回数')
<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x7fd868f38910>
<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x7fd86918e6e0>
<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x7fd8682fa560>
<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x7fd8682faa40>
<matplotlib.legend.Legend at 0x7fd8682f8610>
/tmp/ipykernel_137/4053875472.py:29: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy functionCallCount["benchmarkClassInNum"] = convertBenchmarkClasses_problemSizeInNPB(
問題サイズ = functionCallCount["benchmarkClassInNum"].tolist()
コア数 = functionCallCount["process"].tolist()
関数コール回数 = functionCallCount["functionCallNum"].tolist()
temporaryDF = pd.DataFrame({"問題サイズ": 問題サイズ, "コア数": コア数, "関数コール回数": 関数コール回数})
fig = px.scatter_3d(temporaryDF, x="問題サイズ", y="コア数", z="関数コール回数", width=900, height=900)
fig.show()
x = 問題サイズ
y = コア数
z = 関数コール回数
fig = go.Figure(data=[go.Mesh3d(x=x, y=y, z=z)])
fig.update_layout(width=900, height=900)
fig.show()
{ベンチマーク名(str):MAPE表(DF)}
inputDict = ["benchmarkName":inputDF]
->
ベンチマーク名, MAPE平均
%reset -f
# ipynb形式のライブラリのインポート
%run ./lib/lib.ipynb
# 生データの入ったCSVファイルの保持されたディレクトリ名を格納している変数
csvDirPath = "./csv_files/"
benchmarkNames = ["cg", "ep", "ft", "is", "lu", "mg"]
DEBUG:__main__:hello
resultsList = []
# コア数を128に固定し、問題サイズを振る
classes = ["A", "B", "C", "D", "E", "F"]
processes = [128]
dictColumnNamesJP = {
"functionName": "関数名",
"modelLin": "線形モデル",
"modelIp": "反比例モデル",
"modelLog": "対数モデル",
}
date = "2021年11月17日"
resultDict128WithoutInit = {}
resultDict128WithInit = {}
for benchmarkName in benchmarkNames:
print(f"% _____benchmarkName={benchmarkName}_____")
expVarBase = returnExplanatoryVariablesList(
benchmarkName=benchmarkName,
classes=classes,
processes=processes,
csvDirPath=csvDirPath,
baseExpVar=True,
initExpVar=False,
)
dfBase = returnDictAboutMAPETable(
benchmarkName=benchmarkName,
classes=classes,
targetClass=None,
processes=processes,
targetProcess=None,
expVar=expVarBase,
csvDirPath=csvDirPath,
)
dfBase = dfBase.rename(columns=dictColumnNamesJP)
dfBase = dfBase.set_index("関数名")
# print(
# dfBase.to_latex(
# caption=f"初期変数を含めずに作成したモデルのMAPE(ベンチマーク{benchmarkName.upper()}(コア数128))",
# label=f"{date}BaseMAPE{benchmarkName}",
# )
# )
#
resultDict128WithoutInit[benchmarkName.upper()] = addLowestMAPEColumn(dfBase)
#
expVarInit = returnExplanatoryVariablesList(
benchmarkName=benchmarkName,
classes=classes,
processes=processes,
csvDirPath=csvDirPath,
baseExpVar=False,
initExpVar=True,
)
dfInit = returnDictAboutMAPETable(
benchmarkName=benchmarkName,
classes=classes,
targetClass=None,
processes=processes,
targetProcess=None,
expVar=expVarInit,
csvDirPath=csvDirPath,
)
dfInit = dfInit.rename(columns=dictColumnNamesJP)
dfInit = dfInit.set_index("関数名")
# print(
# dfInit.to_latex(
# caption=f"初期変数を含めて作成したモデルのMAPE(ベンチマーク{benchmarkName.upper()}(コア数128))",
# label=f"{date}InitMAPE{benchmarkName}",
# )
# )
#
resultDict128WithInit[benchmarkName.upper()] = addLowestMAPEColumn(dfInit)
#
csvDirPath = "./csv_files/"
test_returnExplanatoryVariablesList(csvDirPath=csvDirPath)
#
print(
returnDFBenchmarkNameAndAverageLowestMAPE(resultDict128WithInit).to_latex(
caption=f"初期変数を含めて構築したモデルの平均MAPE", label="128WithInit"
)
)
resultsList.append(
returnDFBenchmarkNameAndAverageLowestMAPE(resultDict128WithInit).copy
)
print(
returnDFBenchmarkNameAndAverageLowestMAPE(resultDict128WithoutInit).to_latex(
caption=f"初期変数を含めずに構築したモデルの平均MAPE", label="128WithoutInit"
)
)
resultsList.append(
returnDFBenchmarkNameAndAverageLowestMAPE(resultDict128WithoutInit).copy
)
#
# コア数と問題サイズの両方を振る
classes = ["A", "B", "C", "D", "E", "F"]
processes = [2, 4, 8, 16, 32, 64, 128, 256]
dictColumnNamesJP = {
"functionName": "関数名",
"modelLin": "線形モデル",
"modelIp": "反比例モデル",
"modelLog": "対数モデル",
}
resultDictAllWithInit = {}
resultDictAllWithoutInit = {}
for benchmarkName in benchmarkNames:
print(f"% _____benchmarkName={benchmarkName}_____")
expVarBase = returnExplanatoryVariablesList(
benchmarkName=benchmarkName,
classes=classes,
processes=processes,
csvDirPath=csvDirPath,
baseExpVar=True,
initExpVar=False,
)
dfBase = returnDictAboutMAPETable(
benchmarkName=benchmarkName,
classes=classes,
targetClass=None,
processes=processes,
targetProcess=None,
expVar=expVarBase,
csvDirPath=csvDirPath,
)
dfBase = dfBase.rename(columns=dictColumnNamesJP)
dfBase = dfBase.set_index("関数名")
# print(
# dfBase.to_latex(
# caption=f"初期変数を含めずに作成したモデルのMAPE(ベンチマーク{benchmarkName.upper()})",
# label=f"{date}BaseMAPE{benchmarkName}",
# )
# )
#
resultDictAllWithoutInit[benchmarkName.upper()] = addLowestMAPEColumn(dfBase)
#
expVarInit = returnExplanatoryVariablesList(
benchmarkName=benchmarkName,
classes=classes,
processes=processes,
csvDirPath=csvDirPath,
baseExpVar=False,
initExpVar=True,
)
dfInit = returnDictAboutMAPETable(
benchmarkName=benchmarkName,
classes=classes,
targetClass=None,
processes=processes,
targetProcess=None,
expVar=expVarInit,
csvDirPath=csvDirPath,
)
dfInit = dfInit.rename(columns=dictColumnNamesJP)
dfInit = dfInit.set_index("関数名")
# print(
# dfInit.to_latex(
# caption=f"初期変数を含めて作成したモデルのMAPE(ベンチマーク{benchmarkName.upper()})",
# label=f"{date}InitMAPE{benchmarkName}",
# )
# )
#
resultDictAllWithInit[benchmarkName.upper()] = addLowestMAPEColumn(dfInit)
#
csvDirPath = "./csv_files/"
test_returnExplanatoryVariablesList(csvDirPath=csvDirPath)
#
print(
returnDFBenchmarkNameAndAverageLowestMAPE(resultDictAllWithInit).to_latex(
caption=f"初期変数を含めて構築したモデルの平均MAPE", label="AllWithInit"
)
)
resultsList.append(
returnDFBenchmarkNameAndAverageLowestMAPE(resultDictAllWithInit).copy
)
print(
returnDFBenchmarkNameAndAverageLowestMAPE(resultDictAllWithoutInit).to_latex(
caption=f"初期変数を含めずに構築したモデルの平均MAPE", label="AllWithoutInit"
)
)
resultsList.append(
returnDFBenchmarkNameAndAverageLowestMAPE(resultDictAllWithoutInit).copy
)
# .to_latex(caption=f"初期変数を含めて作成したモデルの平均MAPE")
% _____benchmarkName=cg_____ % _____benchmarkName=ep_____ % _____benchmarkName=ft_____ % _____benchmarkName=is_____ % _____benchmarkName=lu_____ % _____benchmarkName=mg_____
0
/tmp/ipykernel_137/1867081406.py:82: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. /tmp/ipykernel_137/1867081406.py:90: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
\begin{table}
\centering
\caption{初期変数を含めて構築したモデルの平均MAPE}
\label{128WithInit}
\begin{tabular}{lr}
\toprule
{} & 平均絶対誤差 \\
\midrule
CG & 0.0 \\
EP & 0.0 \\
FT & 0.0 \\
IS & 0.0 \\
LU & 1.6 \\
MG & 0.0 \\
\bottomrule
\end{tabular}
\end{table}
\begin{table}
\centering
\caption{初期変数を含めずに構築したモデルの平均MAPE}
\label{128WithoutInit}
\begin{tabular}{lr}
\toprule
{} & 平均絶対誤差 \\
\midrule
CG & 71.0 \\
EP & 0.0 \\
FT & 50.2 \\
IS & 0.0 \\
LU & 30.1 \\
MG & 10.4 \\
\bottomrule
\end{tabular}
\end{table}
% _____benchmarkName=cg_____
% _____benchmarkName=ep_____
% _____benchmarkName=ft_____
% _____benchmarkName=is_____
% _____benchmarkName=lu_____
% _____benchmarkName=mg_____
0
\begin{table}
\centering
\caption{初期変数を含めて構築したモデルの平均MAPE}
\label{AllWithInit}
\begin{tabular}{lr}
\toprule
{} & 平均絶対誤差 \\
\midrule
CG & 33.9 \\
EP & 0.0 \\
FT & 47.5 \\
IS & 364.0 \\
LU & 1958.4 \\
MG & 4.8 \\
\bottomrule
\end{tabular}
\end{table}
\begin{table}
\centering
\caption{初期変数を含めずに構築したモデルの平均MAPE}
\label{AllWithoutInit}
\begin{tabular}{lr}
\toprule
{} & 平均絶対誤差 \\
\midrule
CG & 418.4 \\
EP & 0.0 \\
FT & 243.4 \\
IS & 424.4 \\
LU & 2463.0 \\
MG & 12.3 \\
\bottomrule
\end{tabular}
\end{table}
/tmp/ipykernel_137/1867081406.py:178: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. /tmp/ipykernel_137/1867081406.py:186: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
resultsList = []
# コア数を128に固定し、問題サイズを振る
classes = ["A", "B", "C", "D", "E", "F"]
processes = [128]
dictColumnNamesJP = {
"functionName": "関数名",
"modelLin": "線形モデル",
"modelIp": "反比例モデル",
"modelLog": "対数モデル",
"modelLinAndIp": "線形および反比例モデル",
}
model_names_list = list(dictColumnNamesJP.keys())
model_names_list.remove("functionName")
print(f"model_names_list={model_names_list}")
date = "2021年12月25日"
model_names_list=['modelLin', 'modelIp', 'modelLog', 'modelLinAndIp']
# コア数と問題サイズの両方を振る
classes = ["A", "B", "C", "D", "E", "F"]
processes = [2, 4, 8, 16, 32, 64, 128, 256]
resultDictAllWithInit = {}
resultDictAllWithoutInit = {}
for benchmarkName in benchmarkNames:
print(f"% _____benchmarkName={benchmarkName}_____")
expVarBase = returnExplanatoryVariablesList(
benchmarkName=benchmarkName,
classes=classes,
processes=processes,
csvDirPath=csvDirPath,
baseExpVar=True,
initExpVar=False,
)
dfBase = returnDictAboutMAPETable(
benchmarkName=benchmarkName,
classes=classes,
targetClass=None,
processes=processes,
targetProcess=None,
expVar=expVarBase,
csvDirPath=csvDirPath,
modelNames=model_names_list,
)
dfBase = dfBase.rename(columns=dictColumnNamesJP)
dfBase = dfBase.set_index("関数名")
# print(dfBase)
#
resultDictAllWithoutInit[benchmarkName.upper()] = addLowestMAPEColumn(dfBase)
#
expVarInit = returnExplanatoryVariablesList(
benchmarkName=benchmarkName,
classes=classes,
processes=processes,
csvDirPath=csvDirPath,
baseExpVar=False,
initExpVar=True,
)
dfInit = returnDictAboutMAPETable(
benchmarkName=benchmarkName,
classes=classes,
targetClass=None,
processes=processes,
targetProcess=None,
expVar=expVarInit,
csvDirPath=csvDirPath,
)
dfInit = dfInit.rename(columns=dictColumnNamesJP)
dfInit = dfInit.set_index("関数名")
# print(dfInit)
resultDictAllWithInit[benchmarkName.upper()] = addLowestMAPEColumn(dfInit)
#
csvDirPath = "./csv_files/"
test_returnExplanatoryVariablesList(csvDirPath=csvDirPath)
#
print(
returnDFBenchmarkNameAndAverageLowestMAPE(resultDictAllWithInit).to_latex(
caption=f"初期変数を含めて構築したモデルの平均MAPE", label="AllWithInit"
)
)
resultsList.append(
returnDFBenchmarkNameAndAverageLowestMAPE(resultDictAllWithInit).copy
)
print(
returnDFBenchmarkNameAndAverageLowestMAPE(resultDictAllWithoutInit).to_latex(
caption=f"初期変数を含めずに構築したモデルの平均MAPE", label="AllWithoutInit"
)
)
resultsList.append(
returnDFBenchmarkNameAndAverageLowestMAPE(resultDictAllWithoutInit).copy
)
# .to_latex(caption=f"初期変数を含めて作成したモデルの平均MAPE")
% _____benchmarkName=cg_____ % _____benchmarkName=ep_____ % _____benchmarkName=ft_____ % _____benchmarkName=is_____ % _____benchmarkName=lu_____ % _____benchmarkName=mg_____
0
\begin{table}
\centering
\caption{初期変数を含めて構築したモデルの平均MAPE}
\label{AllWithInit}
\begin{tabular}{lr}
\toprule
{} & 平均絶対誤差 \\
\midrule
CG & 33.9 \\
EP & 0.0 \\
FT & 47.5 \\
IS & 364.0 \\
LU & 1958.4 \\
MG & 4.8 \\
\bottomrule
\end{tabular}
\end{table}
\begin{table}
\centering
\caption{初期変数を含めずに構築したモデルの平均MAPE}
\label{AllWithoutInit}
\begin{tabular}{lr}
\toprule
{} & 平均絶対誤差 \\
\midrule
CG & 418.4 \\
EP & 0.0 \\
FT & 213.9 \\
IS & 424.4 \\
LU & 2449.7 \\
MG & 12.3 \\
\bottomrule
\end{tabular}
\end{table}
/tmp/ipykernel_137/164478607.py:63: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. /tmp/ipykernel_137/164478607.py:71: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
resultsList = []
# コア数を128に固定し、問題サイズを振る
classes = ["A", "B", "C", "D", "E", "F"]
processes = [128]
dictColumnNamesJP = {
"functionName": "関数名",
"modelLin": "線形モデル",
"modelIp": "反比例モデル",
"modelLog": "対数モデル",
"modelIpAndLin": "反比例および線形モデル",
}
model_names_list = list(dictColumnNamesJP.keys())
model_names_list.remove("functionName")
print(f"model_names_list={model_names_list}")
date = "2022年1月3日"
model_names_list=['modelLin', 'modelIp', 'modelLog', 'modelIpAndLin']
# コア数と問題サイズの両方を振る
classes = ["A", "B", "C", "D", "E", "F"]
processes = [2, 4, 8, 16, 32, 64, 128, 256]
resultDictAllWithInit = {}
resultDictAllWithoutInit = {}
for benchmarkName in benchmarkNames:
print(f"% _____benchmarkName={benchmarkName}_____")
expVarBase = returnExplanatoryVariablesList(
benchmarkName=benchmarkName,
classes=classes,
processes=processes,
csvDirPath=csvDirPath,
baseExpVar=True,
initExpVar=False,
)
dfBase = returnDictAboutMAPETable(
benchmarkName=benchmarkName,
classes=classes,
targetClass=None,
processes=processes,
targetProcess=None,
expVar=expVarBase,
csvDirPath=csvDirPath,
modelNames=model_names_list,
)
dfBase = dfBase.rename(columns=dictColumnNamesJP)
dfBase = dfBase.set_index("関数名")
# print(dfBase)
#
resultDictAllWithoutInit[benchmarkName.upper()] = addLowestMAPEColumn(dfBase)
#
expVarInit = returnExplanatoryVariablesList(
benchmarkName=benchmarkName,
classes=classes,
processes=processes,
csvDirPath=csvDirPath,
baseExpVar=False,
initExpVar=True,
)
dfInit = returnDictAboutMAPETable(
benchmarkName=benchmarkName,
classes=classes,
targetClass=None,
processes=processes,
targetProcess=None,
expVar=expVarInit,
csvDirPath=csvDirPath,
)
dfInit = dfInit.rename(columns=dictColumnNamesJP)
dfInit = dfInit.set_index("関数名")
# print(dfInit)
resultDictAllWithInit[benchmarkName.upper()] = addLowestMAPEColumn(dfInit)
#
csvDirPath = "./csv_files/"
test_returnExplanatoryVariablesList(csvDirPath=csvDirPath)
#
print(
returnDFBenchmarkNameAndAverageLowestMAPE(resultDictAllWithInit).to_latex(
caption=f"初期変数を含めて構築したモデルの平均MAPE", label="AllWithInit"
)
)
resultsList.append(
returnDFBenchmarkNameAndAverageLowestMAPE(resultDictAllWithInit).copy
)
print(
returnDFBenchmarkNameAndAverageLowestMAPE(resultDictAllWithoutInit).to_latex(
caption=f"初期変数を含めずに構築したモデルの平均MAPE", label="AllWithoutInit"
)
)
resultsList.append(
returnDFBenchmarkNameAndAverageLowestMAPE(resultDictAllWithoutInit).copy
)
# .to_latex(caption=f"初期変数を含めて作成したモデルの平均MAPE")
% _____benchmarkName=cg_____ % _____benchmarkName=ep_____ % _____benchmarkName=ft_____ % _____benchmarkName=is_____ % _____benchmarkName=lu_____ % _____benchmarkName=mg_____
0
\begin{table}
\centering
\caption{初期変数を含めて構築したモデルの平均MAPE}
\label{AllWithInit}
\begin{tabular}{lr}
\toprule
{} & 平均絶対誤差 \\
\midrule
CG & 33.9 \\
EP & 0.0 \\
FT & 47.5 \\
IS & 364.0 \\
LU & 1958.4 \\
MG & 4.8 \\
\bottomrule
\end{tabular}
\end{table}
\begin{table}
\centering
\caption{初期変数を含めずに構築したモデルの平均MAPE}
\label{AllWithoutInit}
\begin{tabular}{lr}
\toprule
{} & 平均絶対誤差 \\
\midrule
CG & 418.4 \\
EP & 0.0 \\
FT & 46.0 \\
IS & 176.9 \\
LU & 2123.8 \\
MG & 12.3 \\
\bottomrule
\end{tabular}
\end{table}
/tmp/ipykernel_137/164478607.py:63: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. /tmp/ipykernel_137/164478607.py:71: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
resultsList = []
# コア数を128に固定し、問題サイズを振る
classes = ["A", "B", "C", "D", "E", "F"]
processes = [128]
dictColumnNamesJP = {
"functionName": "関数名",
"modelLin": "線形モデル",
"modelIp": "反比例モデル",
"modelLog": "対数モデル",
"modelLinAndLog": "線形および対数モデル",
}
model_names_list = list(dictColumnNamesJP.keys())
model_names_list.remove("functionName")
print(f"model_names_list={model_names_list}")
date = "2022年1月3日"
model_names_list=['modelLin', 'modelIp', 'modelLog', 'modelLinAndLog']
# コア数と問題サイズの両方を振る
classes = ["A", "B", "C", "D", "E", "F"]
processes = [2, 4, 8, 16, 32, 64, 128, 256]
resultDictAllWithInit = {}
resultDictAllWithoutInit = {}
for benchmarkName in benchmarkNames:
print(f"% _____benchmarkName={benchmarkName}_____")
expVarBase = returnExplanatoryVariablesList(
benchmarkName=benchmarkName,
classes=classes,
processes=processes,
csvDirPath=csvDirPath,
baseExpVar=True,
initExpVar=False,
)
dfBase = returnDictAboutMAPETable(
benchmarkName=benchmarkName,
classes=classes,
targetClass=None,
processes=processes,
targetProcess=None,
expVar=expVarBase,
csvDirPath=csvDirPath,
modelNames=model_names_list,
)
dfBase = dfBase.rename(columns=dictColumnNamesJP)
dfBase = dfBase.set_index("関数名")
# print(dfBase)
#
resultDictAllWithoutInit[benchmarkName.upper()] = addLowestMAPEColumn(dfBase)
#
expVarInit = returnExplanatoryVariablesList(
benchmarkName=benchmarkName,
classes=classes,
processes=processes,
csvDirPath=csvDirPath,
baseExpVar=False,
initExpVar=True,
)
dfInit = returnDictAboutMAPETable(
benchmarkName=benchmarkName,
classes=classes,
targetClass=None,
processes=processes,
targetProcess=None,
expVar=expVarInit,
csvDirPath=csvDirPath,
)
dfInit = dfInit.rename(columns=dictColumnNamesJP)
dfInit = dfInit.set_index("関数名")
# print(dfInit)
resultDictAllWithInit[benchmarkName.upper()] = addLowestMAPEColumn(dfInit)
#
csvDirPath = "./csv_files/"
test_returnExplanatoryVariablesList(csvDirPath=csvDirPath)
#
print(
returnDFBenchmarkNameAndAverageLowestMAPE(resultDictAllWithInit).to_latex(
caption=f"初期変数を含めて構築したモデルの平均MAPE", label="AllWithInit"
)
)
resultsList.append(
returnDFBenchmarkNameAndAverageLowestMAPE(resultDictAllWithInit).copy
)
print(
returnDFBenchmarkNameAndAverageLowestMAPE(resultDictAllWithoutInit).to_latex(
caption=f"初期変数を含めずに構築したモデルの平均MAPE", label="AllWithoutInit"
)
)
resultsList.append(
returnDFBenchmarkNameAndAverageLowestMAPE(resultDictAllWithoutInit).copy
)
# .to_latex(caption=f"初期変数を含めて作成したモデルの平均MAPE")
% _____benchmarkName=cg_____ % _____benchmarkName=ep_____ % _____benchmarkName=ft_____ % _____benchmarkName=is_____ % _____benchmarkName=lu_____ % _____benchmarkName=mg_____
0
\begin{table}
\centering
\caption{初期変数を含めて構築したモデルの平均MAPE}
\label{AllWithInit}
\begin{tabular}{lr}
\toprule
{} & 平均絶対誤差 \\
\midrule
CG & 33.9 \\
EP & 0.0 \\
FT & 47.5 \\
IS & 364.0 \\
LU & 1958.4 \\
MG & 4.8 \\
\bottomrule
\end{tabular}
\end{table}
\begin{table}
\centering
\caption{初期変数を含めずに構築したモデルの平均MAPE}
\label{AllWithoutInit}
\begin{tabular}{lr}
\toprule
{} & 平均絶対誤差 \\
\midrule
CG & 418.4 \\
EP & 0.0 \\
FT & 243.4 \\
IS & 424.4 \\
LU & 2453.1 \\
MG & 11.5 \\
\bottomrule
\end{tabular}
\end{table}
/tmp/ipykernel_137/164478607.py:63: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. /tmp/ipykernel_137/164478607.py:71: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
resultsList = []
# コア数を128に固定し、問題サイズを振る
classes = ["A", "B", "C", "D", "E", "F"]
processes = [128]
dictColumnNamesJP = {
"functionName": "関数名",
"modelLin": "線形モデル",
"modelIp": "反比例モデル",
"modelLog": "対数モデル",
"modelIpAndLog": "反比例および対数モデル",
}
model_names_list = list(dictColumnNamesJP.keys())
model_names_list.remove("functionName")
print(f"model_names_list={model_names_list}")
date = "2022年1月3日"
model_names_list=['modelLin', 'modelIp', 'modelLog', 'modelIpAndLog']
# コア数と問題サイズの両方を振る
classes = ["A", "B", "C", "D", "E", "F"]
processes = [2, 4, 8, 16, 32, 64, 128, 256]
resultDictAllWithInit = {}
resultDictAllWithoutInit = {}
for benchmarkName in benchmarkNames:
print(f"% _____benchmarkName={benchmarkName}_____")
expVarBase = returnExplanatoryVariablesList(
benchmarkName=benchmarkName,
classes=classes,
processes=processes,
csvDirPath=csvDirPath,
baseExpVar=True,
initExpVar=False,
)
dfBase = returnDictAboutMAPETable(
benchmarkName=benchmarkName,
classes=classes,
targetClass=None,
processes=processes,
targetProcess=None,
expVar=expVarBase,
csvDirPath=csvDirPath,
modelNames=model_names_list,
)
dfBase = dfBase.rename(columns=dictColumnNamesJP)
dfBase = dfBase.set_index("関数名")
# print(dfBase)
#
resultDictAllWithoutInit[benchmarkName.upper()] = addLowestMAPEColumn(dfBase)
#
expVarInit = returnExplanatoryVariablesList(
benchmarkName=benchmarkName,
classes=classes,
processes=processes,
csvDirPath=csvDirPath,
baseExpVar=False,
initExpVar=True,
)
dfInit = returnDictAboutMAPETable(
benchmarkName=benchmarkName,
classes=classes,
targetClass=None,
processes=processes,
targetProcess=None,
expVar=expVarInit,
csvDirPath=csvDirPath,
)
dfInit = dfInit.rename(columns=dictColumnNamesJP)
dfInit = dfInit.set_index("関数名")
# print(dfInit)
resultDictAllWithInit[benchmarkName.upper()] = addLowestMAPEColumn(dfInit)
#
csvDirPath = "./csv_files/"
test_returnExplanatoryVariablesList(csvDirPath=csvDirPath)
#
print(
returnDFBenchmarkNameAndAverageLowestMAPE(resultDictAllWithInit).to_latex(
caption=f"初期変数を含めて構築したモデルの平均MAPE", label="AllWithInit"
)
)
resultsList.append(
returnDFBenchmarkNameAndAverageLowestMAPE(resultDictAllWithInit).copy
)
print(
returnDFBenchmarkNameAndAverageLowestMAPE(resultDictAllWithoutInit).to_latex(
caption=f"初期変数を含めずに構築したモデルの平均MAPE", label="AllWithoutInit"
)
)
resultsList.append(
returnDFBenchmarkNameAndAverageLowestMAPE(resultDictAllWithoutInit).copy
)
# .to_latex(caption=f"初期変数を含めて作成したモデルの平均MAPE")
% _____benchmarkName=cg_____ % _____benchmarkName=ep_____ % _____benchmarkName=ft_____ % _____benchmarkName=is_____ % _____benchmarkName=lu_____ % _____benchmarkName=mg_____
0
\begin{table}
\centering
\caption{初期変数を含めて構築したモデルの平均MAPE}
\label{AllWithInit}
\begin{tabular}{lr}
\toprule
{} & 平均絶対誤差 \\
\midrule
CG & 33.9 \\
EP & 0.0 \\
FT & 47.5 \\
IS & 364.0 \\
LU & 1958.4 \\
MG & 4.8 \\
\bottomrule
\end{tabular}
\end{table}
\begin{table}
\centering
\caption{初期変数を含めずに構築したモデルの平均MAPE}
\label{AllWithoutInit}
\begin{tabular}{lr}
\toprule
{} & 平均絶対誤差 \\
\midrule
CG & 418.4 \\
EP & 0.0 \\
FT & 243.4 \\
IS & 424.4 \\
LU & 2350.1 \\
MG & 12.0 \\
\bottomrule
\end{tabular}
\end{table}
/tmp/ipykernel_137/164478607.py:63: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. /tmp/ipykernel_137/164478607.py:71: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
resultsList = []
# コア数を128に固定し、問題サイズを振る
classes = ["A", "B", "C", "D", "E", "F"]
processes = [128]
dictColumnNamesJP = {
"functionName": "関数名",
"modelLin": "線形モデル",
"modelIp": "反比例モデル",
"modelLog": "対数モデル",
"modelLogAndLin": "対数および線形モデル",
}
model_names_list = list(dictColumnNamesJP.keys())
model_names_list.remove("functionName")
print(f"model_names_list={model_names_list}")
date = "2022年1月3日"
model_names_list=['modelLin', 'modelIp', 'modelLog', 'modelLogAndLin']
# コア数と問題サイズの両方を振る
classes = ["A", "B", "C", "D", "E", "F"]
processes = [2, 4, 8, 16, 32, 64, 128, 256]
resultDictAllWithInit = {}
resultDictAllWithoutInit = {}
for benchmarkName in benchmarkNames:
print(f"% _____benchmarkName={benchmarkName}_____")
expVarBase = returnExplanatoryVariablesList(
benchmarkName=benchmarkName,
classes=classes,
processes=processes,
csvDirPath=csvDirPath,
baseExpVar=True,
initExpVar=False,
)
dfBase = returnDictAboutMAPETable(
benchmarkName=benchmarkName,
classes=classes,
targetClass=None,
processes=processes,
targetProcess=None,
expVar=expVarBase,
csvDirPath=csvDirPath,
modelNames=model_names_list,
)
dfBase = dfBase.rename(columns=dictColumnNamesJP)
dfBase = dfBase.set_index("関数名")
# print(dfBase)
#
resultDictAllWithoutInit[benchmarkName.upper()] = addLowestMAPEColumn(dfBase)
#
expVarInit = returnExplanatoryVariablesList(
benchmarkName=benchmarkName,
classes=classes,
processes=processes,
csvDirPath=csvDirPath,
baseExpVar=False,
initExpVar=True,
)
dfInit = returnDictAboutMAPETable(
benchmarkName=benchmarkName,
classes=classes,
targetClass=None,
processes=processes,
targetProcess=None,
expVar=expVarInit,
csvDirPath=csvDirPath,
)
dfInit = dfInit.rename(columns=dictColumnNamesJP)
dfInit = dfInit.set_index("関数名")
# print(dfInit)
resultDictAllWithInit[benchmarkName.upper()] = addLowestMAPEColumn(dfInit)
#
csvDirPath = "./csv_files/"
test_returnExplanatoryVariablesList(csvDirPath=csvDirPath)
#
print(
returnDFBenchmarkNameAndAverageLowestMAPE(resultDictAllWithInit).to_latex(
caption=f"初期変数を含めて構築したモデルの平均MAPE", label="AllWithInit"
)
)
resultsList.append(
returnDFBenchmarkNameAndAverageLowestMAPE(resultDictAllWithInit).copy
)
print(
returnDFBenchmarkNameAndAverageLowestMAPE(resultDictAllWithoutInit).to_latex(
caption=f"初期変数を含めずに構築したモデルの平均MAPE", label="AllWithoutInit"
)
)
resultsList.append(
returnDFBenchmarkNameAndAverageLowestMAPE(resultDictAllWithoutInit).copy
)
# .to_latex(caption=f"初期変数を含めて作成したモデルの平均MAPE")
% _____benchmarkName=cg_____ % _____benchmarkName=ep_____ % _____benchmarkName=ft_____ % _____benchmarkName=is_____ % _____benchmarkName=lu_____ % _____benchmarkName=mg_____
0
\begin{table}
\centering
\caption{初期変数を含めて構築したモデルの平均MAPE}
\label{AllWithInit}
\begin{tabular}{lr}
\toprule
{} & 平均絶対誤差 \\
\midrule
CG & 33.9 \\
EP & 0.0 \\
FT & 47.5 \\
IS & 364.0 \\
LU & 1958.4 \\
MG & 4.8 \\
\bottomrule
\end{tabular}
\end{table}
\begin{table}
\centering
\caption{初期変数を含めずに構築したモデルの平均MAPE}
\label{AllWithoutInit}
\begin{tabular}{lr}
\toprule
{} & 平均絶対誤差 \\
\midrule
CG & 388.0 \\
EP & 0.0 \\
FT & 182.1 \\
IS & 257.3 \\
LU & 2382.0 \\
MG & 12.3 \\
\bottomrule
\end{tabular}
\end{table}
/tmp/ipykernel_137/164478607.py:63: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. /tmp/ipykernel_137/164478607.py:71: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
resultsList = []
# コア数を128に固定し、問題サイズを振る
classes = ["A", "B", "C", "D", "E", "F"]
processes = [128]
dictColumnNamesJP = {
"functionName": "関数名",
"modelLin": "線形モデル",
"modelIp": "反比例モデル",
"modelLog": "対数モデル",
"modelLogAndIp": "対数および反比例モデル",
}
model_names_list = list(dictColumnNamesJP.keys())
model_names_list.remove("functionName")
print(f"model_names_list={model_names_list}")
date = "2022年1月3日"
model_names_list=['modelLin', 'modelIp', 'modelLog', 'modelLogAndIp']
# コア数と問題サイズの両方を振る
classes = ["A", "B", "C", "D", "E", "F"]
processes = [2, 4, 8, 16, 32, 64, 128, 256]
resultDictAllWithInit = {}
resultDictAllWithoutInit = {}
for benchmarkName in benchmarkNames:
print(f"% _____benchmarkName={benchmarkName}_____")
expVarBase = returnExplanatoryVariablesList(
benchmarkName=benchmarkName,
classes=classes,
processes=processes,
csvDirPath=csvDirPath,
baseExpVar=True,
initExpVar=False,
)
dfBase = returnDictAboutMAPETable(
benchmarkName=benchmarkName,
classes=classes,
targetClass=None,
processes=processes,
targetProcess=None,
expVar=expVarBase,
csvDirPath=csvDirPath,
modelNames=model_names_list,
)
dfBase = dfBase.rename(columns=dictColumnNamesJP)
dfBase = dfBase.set_index("関数名")
# print(dfBase)
#
resultDictAllWithoutInit[benchmarkName.upper()] = addLowestMAPEColumn(dfBase)
#
expVarInit = returnExplanatoryVariablesList(
benchmarkName=benchmarkName,
classes=classes,
processes=processes,
csvDirPath=csvDirPath,
baseExpVar=False,
initExpVar=True,
)
dfInit = returnDictAboutMAPETable(
benchmarkName=benchmarkName,
classes=classes,
targetClass=None,
processes=processes,
targetProcess=None,
expVar=expVarInit,
csvDirPath=csvDirPath,
)
dfInit = dfInit.rename(columns=dictColumnNamesJP)
dfInit = dfInit.set_index("関数名")
# print(dfInit)
resultDictAllWithInit[benchmarkName.upper()] = addLowestMAPEColumn(dfInit)
#
csvDirPath = "./csv_files/"
test_returnExplanatoryVariablesList(csvDirPath=csvDirPath)
#
print(
returnDFBenchmarkNameAndAverageLowestMAPE(resultDictAllWithInit).to_latex(
caption=f"初期変数を含めて構築したモデルの平均MAPE", label="AllWithInit"
)
)
resultsList.append(
returnDFBenchmarkNameAndAverageLowestMAPE(resultDictAllWithInit).copy
)
print(
returnDFBenchmarkNameAndAverageLowestMAPE(resultDictAllWithoutInit).to_latex(
caption=f"初期変数を含めずに構築したモデルの平均MAPE", label="AllWithoutInit"
)
)
resultsList.append(
returnDFBenchmarkNameAndAverageLowestMAPE(resultDictAllWithoutInit).copy
)
# .to_latex(caption=f"初期変数を含めて作成したモデルの平均MAPE")
% _____benchmarkName=cg_____ % _____benchmarkName=ep_____ % _____benchmarkName=ft_____ % _____benchmarkName=is_____ % _____benchmarkName=lu_____ % _____benchmarkName=mg_____
0
\begin{table}
\centering
\caption{初期変数を含めて構築したモデルの平均MAPE}
\label{AllWithInit}
\begin{tabular}{lr}
\toprule
{} & 平均絶対誤差 \\
\midrule
CG & 33.9 \\
EP & 0.0 \\
FT & 47.5 \\
IS & 364.0 \\
LU & 1958.4 \\
MG & 4.8 \\
\bottomrule
\end{tabular}
\end{table}
\begin{table}
\centering
\caption{初期変数を含めずに構築したモデルの平均MAPE}
\label{AllWithoutInit}
\begin{tabular}{lr}
\toprule
{} & 平均絶対誤差 \\
\midrule
CG & 418.2 \\
EP & 0.0 \\
FT & 243.4 \\
IS & 424.4 \\
LU & 775.6 \\
MG & 12.3 \\
\bottomrule
\end{tabular}
\end{table}
/tmp/ipykernel_137/164478607.py:63: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. /tmp/ipykernel_137/164478607.py:71: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
%reset -f
# ipynb形式のライブラリのインポート
%run ./lib/lib.ipynb
# 生データの入ったCSVファイルの保持されたディレクトリ名を格納している変数
csvDirPath = "./csv_files/"
benchmarkNames = ["cg", "ep", "ft", "is", "lu", "mg"]
DEBUG:__main__:hello
benchmark_name = "cg"
classes = ["A", "B", "C", "D", "E", "F"]
processes = [2, 4, 8, 16, 32, 64, 128, 256]
dict_column_names_JP = {
"functionName": "関数名",
"modelLin": "線形",
"modelIp": "反比例",
"modelLog": "対数",
"modelLinAndIp": "線形&反比例",
"modelLinAndLog": "線形&対数",
"modelIpAndLin": "反比例&線形",
"modelIpAndLog": "反比例&対数",
"modelLogAndLin": "対数&線形",
"modelLogAndIp": "対数&反比例",
}
model_names_list_JP = list(dict_column_names_JP.values())
model_names_list_JP.remove("関数名")
model_names_list = list(dict_column_names_JP.keys())
model_names_list.remove("functionName")
date = "2022年2月2日"
# コア数と問題サイズの両方を振る
rawDF = return_rawDF_with_init_param(
benchmark_name=benchmark_name,
classes=classes,
processes=processes,
csv_dir_path="./csv_files/",
)
exp_var = rawDF.columns.tolist()
for element_be_removed in [
"functionName",
"functionCallNum",
"intBenchmarkClass",
"benchmarkName",
"benchmarkClass",
]:
exp_var.remove(element_be_removed)
res_var = ["functionCallNum"]
returned_MAPE_table = return_MAPE_Table_DF_from_rawDF(
rawDF=rawDF,
exp_var_list=exp_var,
res_var_list=res_var,
model_name_list=model_names_list,
)
df = returned_MAPE_table.rename(columns=dict_column_names_JP)
df = df.set_index("関数名")
df = df.sort_index(axis=0)
df = df.sort_index(axis=1)
df = addLowestMAPEsModelNameColumn(df, version=2, model_name_list=model_names_list_JP)
print(
df.to_latex(
caption=f"ベンチマークプログラム{benchmark_name.upper()}における各モデルでのMAPE",
label=f"{date}everyModelsMAPEtableIn{benchmark_name.upper()}",
)
)
\begin{table}
\centering
\caption{ベンチマークプログラムCGにおける各モデルでのMAPE}
\label{2022年2月2日everyModelsMAPEtableInCG}
\begin{tabular}{lrrrrrrrrrl}
\toprule
{} & 反比例 & 反比例&対数 & 反比例&線形 & 対数 & 対数&反比例 & 対数&線形 & 線形 & 線形&反比例 & 線形&対数 & 最適モデル \\
関数名 & & & & & & & & & & \\
\midrule
.TAU\_application & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
ALLOC\_SPACE & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
CG & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
CONJ\_GRAD & 13.876670 & 13.732936 & 14.498431 & 14.804389 & 14.717362 & 15.248570 & 13.161174 & 12.412559 & 12.755412 & 線形&反比例 \\
ICNVRT & 662.481358 & 2504.274677 & 894.574028 & 2443.699386 & 3811.490854 & 815.789372 & 502.123230 & 6621.011052 & 2283.060136 & 線形 \\
INITIALIZE\_MPI & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MAKEA & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI\_Barrier() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI\_Bcast() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI\_Comm\_rank() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI\_Comm\_size() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI\_Finalize() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI\_Init() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI\_Irecv() & 38.629896 & 38.552817 & 39.029932 & 35.489902 & 35.487038 & 35.954193 & 34.989183 & 35.515528 & 35.660459 & 線形 \\
MPI\_Reduce() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI\_Send() & 38.629896 & 38.552817 & 39.029932 & 35.489902 & 35.487038 & 35.954193 & 34.989183 & 35.515528 & 35.660459 & 線形 \\
MPI\_Wait() & 38.629896 & 38.552817 & 39.029932 & 35.489902 & 35.487038 & 35.954193 & 34.989183 & 35.515528 & 35.660459 & 線形 \\
SETUP\_PROC\_INFO & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
SETUP\_SUBMATRIX\_INFO & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
SPARSE & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
SPRNVC & 336.648986 & 1321.895548 & 345.739861 & 1306.978016 & 1503.097645 & 338.951990 & 198.005854 & 2573.964383 & 1221.597908 & 線形 \\
VECSET & 336.648986 & 1321.895548 & 345.739861 & 1306.978016 & 1503.097645 & 338.951990 & 198.005854 & 2573.964383 & 1221.597908 & 線形 \\
\bottomrule
\end{tabular}
\end{table}
/tmp/ipykernel_137/2558574860.py:61: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
モデルA,モデルB,モデルCがあるとすると、組み合わせモデル「モデルA&モデルB」および「モデルA&モデルC」のMAPEがモデルAのMAPEと同じになっている。 これは「モデルA&モデルX」だけで起きているのではなく「モデルB&モデルX」,「モデルC&モデルX」でも起きている。
説明変数から intBenchmarkClass を削除する前
benchmark_name = "cg"
classes = ["A", "B", "C", "D", "E", "F"]
processes = [2, 4, 8, 16, 32, 64, 128, 256]
dict_column_names_JP = {
"functionName": "関数名",
"modelLin": "線形",
"modelIp": "反比例",
"modelLog": "対数",
"modelLinAndIp": "線形&反比例",
"modelLinAndLog": "線形&対数",
"modelIpAndLin": "反比例&線形",
"modelIpAndLog": "反比例&対数",
"modelLogAndLin": "対数&線形",
"modelLogAndIp": "対数&反比例",
}
model_names_list_JP = list(dict_column_names_JP.values())
model_names_list_JP.remove("関数名")
model_names_list = list(dict_column_names_JP.keys())
model_names_list.remove("functionName")
date = "2022年2月2日"
# コア数と問題サイズの両方を振る
rawDF = return_rawDF_with_init_param(
benchmark_name=benchmark_name,
classes=classes,
processes=processes,
csv_dir_path="./csv_files/",
)
# 関数をリスト化
target_function_names = ["CONJ_GRAD", "ICNVRT", "MPI_Irecv()", "SPRNVC", "VECSET"]
# ここで関数を抽出
target_function = target_function_names[0]
target_function_rawDF = rawDF[rawDF["functionName"] == target_function]
exp_var = target_function_rawDF.columns.tolist()
for element_be_removed in [
"functionName",
"functionCallNum",
# "intBenchmarkClass",
"benchmarkName",
"benchmarkClass",
]:
exp_var.remove(element_be_removed)
res_var = ["functionCallNum"]
returned_MAPE_table = return_MAPE_Table_DF_from_rawDF(
rawDF=target_function_rawDF,
exp_var_list=exp_var,
res_var_list=res_var,
model_name_list=model_names_list,
)
df = returned_MAPE_table.rename(columns=dict_column_names_JP)
df = df.set_index("関数名")
df = df.sort_index(axis=0)
df = df.sort_index(axis=1)
df = addLowestMAPEsModelNameColumn(df, version=2, model_name_list=model_names_list_JP)
print(
df.to_latex(
caption=f"ベンチマークプログラム{benchmark_name.upper()}における各モデルでのMAPE",
label=f"{date}everyModelsMAPEtableIn{benchmark_name.upper()}",
)
)
\begin{table}
\centering
\caption{ベンチマークプログラムCGにおける各モデルでのMAPE}
\label{2022年2月2日everyModelsMAPEtableInCG}
\begin{tabular}{lrrrrrrrrrl}
\toprule
{} & 反比例 & 反比例&対数 & 反比例&線形 & 対数 & 対数&反比例 & 対数&線形 & 線形 & 線形&反比例 & 線形&対数 & 最適モデル \\
関数名 & & & & & & & & & & \\
\midrule
CONJ\_GRAD & 13.870213 & 13.870213 & 13.870213 & 14.691927 & 14.691927 & 14.691927 & 12.4383 & 12.4383 & 12.4383 & 線形 \\
\bottomrule
\end{tabular}
\end{table}
/tmp/ipykernel_137/4085886897.py:68: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
説明変数から intBenchmarkClass を削除した後
benchmark_name = "cg"
classes = ["A", "B", "C", "D", "E", "F"]
processes = [2, 4, 8, 16, 32, 64, 128, 256]
dict_column_names_JP = {
"functionName": "関数名",
"modelLin": "線形",
"modelIp": "反比例",
"modelLog": "対数",
"modelLinAndIp": "線形&反比例",
"modelLinAndLog": "線形&対数",
"modelIpAndLin": "反比例&線形",
"modelIpAndLog": "反比例&対数",
"modelLogAndLin": "対数&線形",
"modelLogAndIp": "対数&反比例",
}
model_names_list_JP = list(dict_column_names_JP.values())
model_names_list_JP.remove("関数名")
model_names_list = list(dict_column_names_JP.keys())
model_names_list.remove("functionName")
date = "2022年2月2日"
# コア数と問題サイズの両方を振る
rawDF = return_rawDF_with_init_param(
benchmark_name=benchmark_name,
classes=classes,
processes=processes,
csv_dir_path="./csv_files/",
)
# 関数をリスト化
target_function_names = ["CONJ_GRAD", "ICNVRT", "MPI_Irecv()", "SPRNVC", "VECSET"]
# ここで関数を抽出
target_function = target_function_names[0]
target_function_rawDF = rawDF[rawDF["functionName"] == target_function]
exp_var = target_function_rawDF.columns.tolist()
for element_be_removed in [
"functionName",
"functionCallNum",
"intBenchmarkClass",
"benchmarkName",
"benchmarkClass",
]:
exp_var.remove(element_be_removed)
res_var = ["functionCallNum"]
returned_MAPE_table = return_MAPE_Table_DF_from_rawDF(
rawDF=target_function_rawDF,
exp_var_list=exp_var,
res_var_list=res_var,
model_name_list=model_names_list,
)
df = returned_MAPE_table.rename(columns=dict_column_names_JP)
df = df.set_index("関数名")
df = df.sort_index(axis=0)
df = df.sort_index(axis=1)
df = addLowestMAPEsModelNameColumn(df, version=2, model_name_list=model_names_list_JP)
print(
df.to_latex(
caption=f"ベンチマークプログラム{benchmark_name.upper()}における各モデルでのMAPE",
label=f"{date}everyModelsMAPEtableIn{benchmark_name.upper()}",
)
)
\begin{table}
\centering
\caption{ベンチマークプログラムCGにおける各モデルでのMAPE}
\label{2022年2月2日everyModelsMAPEtableInCG}
\begin{tabular}{lrrrrrrrrrl}
\toprule
{} & 反比例 & 反比例&対数 & 反比例&線形 & 対数 & 対数&反比例 & 対数&線形 & 線形 & 線形&反比例 & 線形&対数 & 最適モデル \\
関数名 & & & & & & & & & & \\
\midrule
CONJ\_GRAD & 13.87667 & 13.732936 & 14.498431 & 14.804389 & 14.717362 & 15.24857 & 13.161174 & 12.412559 & 12.755412 & 線形&反比例 \\
\bottomrule
\end{tabular}
\end{table}
/tmp/ipykernel_137/3061610140.py:68: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
benchmark_name = "cg"
classes = ["A", "B", "C", "D", "E", "F"]
processes = [2, 4, 8, 16, 32, 64, 128, 256]
dict_column_names_JP = {
"functionName": "関数名",
"modelLin": "線形",
"modelIp": "反比例",
"modelLog": "対数",
"modelLinAndIp": "線形&反比例",
"modelLinAndLog": "線形&対数",
"modelIpAndLin": "反比例&線形",
"modelIpAndLog": "反比例&対数",
"modelLogAndLin": "対数&線形",
"modelLogAndIp": "対数&反比例",
}
model_names_list_JP = list(dict_column_names_JP.values())
model_names_list_JP.remove("関数名")
model_names_list = list(dict_column_names_JP.keys())
model_names_list.remove("functionName")
date = "2022年2月2日"
# コア数と問題サイズの両方を振る
rawDF = return_rawDF_with_init_param(
benchmark_name=benchmark_name,
classes=classes,
processes=processes,
csv_dir_path="./csv_files/",
)
exp_var = rawDF.columns.tolist()
for element_be_removed in [
"functionName",
"functionCallNum",
"benchmarkName",
"benchmarkClass",
]:
exp_var.remove(element_be_removed)
res_var = ["functionCallNum"]
returned_MAPE_table = return_MAPE_Table_DF_from_rawDF(
rawDF=rawDF,
exp_var_list=exp_var,
res_var_list=res_var,
model_name_list=model_names_list,
)
df = returned_MAPE_table.rename(columns=dict_column_names_JP)
df = df.set_index("関数名")
df = df.sort_index(axis=0)
df = df.sort_index(axis=1)
df = addLowestMAPEsModelNameColumn(df, version=2, model_name_list=model_names_list_JP)
print(
df.to_latex(
caption=f"ベンチマークプログラム{benchmark_name.upper()}における各モデルでのMAPE",
label=f"{date}everyModelsMAPEtableIn{benchmark_name.upper()}",
)
)
\begin{table}
\centering
\caption{ベンチマークプログラムCGにおける各モデルでのMAPE}
\label{2022年2月2日everyModelsMAPEtableInCG}
\begin{tabular}{lrrrrrrrrrl}
\toprule
{} & 反比例 & 反比例&対数 & 反比例&線形 & 対数 & 対数&反比例 & 対数&線形 & 線形 & 線形&反比例 & 線形&対数 & 最適モデル \\
関数名 & & & & & & & & & & \\
\midrule
.TAU\_application & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
ALLOC\_SPACE & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
CG & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
CONJ\_GRAD & 13.870213 & 13.870213 & 13.870213 & 14.691927 & 14.691927 & 14.691927 & 12.438300 & 12.438300 & 12.438300 & 線形 \\
ICNVRT & 670.402547 & 670.402550 & 670.402550 & 687.836098 & 687.836096 & 687.836098 & 353.729909 & 353.729892 & 353.729909 & 線形&反比例 \\
INITIALIZE\_MPI & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MAKEA & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI\_Barrier() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI\_Bcast() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI\_Comm\_rank() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI\_Comm\_size() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI\_Finalize() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI\_Init() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI\_Irecv() & 38.397500 & 38.397500 & 38.397500 & 35.460732 & 35.460732 & 35.460732 & 35.678469 & 35.678469 & 35.678469 & 対数&反比例 \\
MPI\_Reduce() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI\_Send() & 38.397500 & 38.397500 & 38.397500 & 35.460732 & 35.460732 & 35.460732 & 35.678469 & 35.678469 & 35.678469 & 対数&反比例 \\
MPI\_Wait() & 38.397500 & 38.397500 & 38.397500 & 35.460732 & 35.460732 & 35.460732 & 35.678469 & 35.678469 & 35.678469 & 対数&反比例 \\
SETUP\_PROC\_INFO & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
SETUP\_SUBMATRIX\_INFO & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
SPARSE & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
SPRNVC & 314.849043 & 314.849045 & 314.849045 & 329.372948 & 329.372947 & 329.372948 & 184.309226 & 184.309220 & 184.309226 & 線形&反比例 \\
VECSET & 314.849043 & 314.849045 & 314.849045 & 329.372948 & 329.372947 & 329.372948 & 184.309226 & 184.309220 & 184.309226 & 線形&反比例 \\
\bottomrule
\end{tabular}
\end{table}
/tmp/ipykernel_137/3275562520.py:60: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
returnDictAboutMAPETable() の問題に伴い、最適モデルの組み合わせをまとめた表の結果が怪しい。修正する。
benchmark_name = "cg"
fix_class = ["B"]
fix_process = [128]
classes = ["A", "B", "C", "D", "E", "F"]
processes = [2, 4, 8, 16, 32, 64, 128, 256]
dict_column_names_JP = {
"functionName": "関数名",
"modelLin": "線形モデル",
"modelIp": "反比例モデル",
"modelLog": "対数モデル",
}
model_names_list_JP = list(dict_column_names_JP.values())
model_names_list_JP.remove("関数名")
model_names_list = list(dict_column_names_JP.keys())
model_names_list.remove("functionName")
dict_column_fix_process = {"最適モデル": "最適モデル(コア数固定)"}
dict_column_fix_core = {"最適モデル": "最適モデル(問題サイズ固定)"}
date = "2022年1月22日"
# 問題サイズを固定してコア数を振る
fix_class_rawDF = return_rawDF_with_init_param(
benchmark_name=benchmark_name,
classes=fix_class,
processes=processes,
csv_dir_path="./csv_files/",
)
exp_var = fix_class_rawDF.columns.tolist()
exp_var.remove("functionName")
exp_var.remove("functionCallNum")
exp_var.remove("benchmarkName")
exp_var.remove("benchmarkClass")
res_var = ["functionCallNum"]
fix_class_MAPE_table = return_MAPE_Table_DF_from_rawDF(
rawDF=fix_class_rawDF,
exp_var_list=exp_var,
res_var_list=res_var,
model_name_list=model_names_list,
)
# コア数を固定して問題サイズを振る
fix_process_rawDF = return_rawDF_with_init_param(
benchmark_name=benchmark_name,
classes=classes,
processes=fix_process,
csv_dir_path="./csv_files/",
)
fix_process_MAPE_table = return_MAPE_Table_DF_from_rawDF(
rawDF=fix_process_rawDF,
exp_var_list=exp_var,
res_var_list=res_var,
model_name_list=model_names_list,
)
# 2条件(問題サイズ固定・コア数固定)で取得したMAPE表に最低MAPE値を記録した列を追加する
fix_process_MAPE_table = addLowestMAPEsModelNameColumn(
fix_process_MAPE_table, model_name_list=model_names_list, version=2
)
fix_process_MAPE_table = fix_process_MAPE_table.set_index("functionName")
fix_class_MAPE_table = addLowestMAPEsModelNameColumn(
fix_class_MAPE_table, model_name_list=model_names_list, version=2
)
fix_class_MAPE_table = fix_class_MAPE_table.set_index("functionName")
fix_process_MAPE_table_bestModel = (
fix_process_MAPE_table.loc[:, ["最適モデル"]]
.rename(columns={"最適モデル": "最適モデル(コア数固定)"})
.replace(dict_column_names_JP)
)
fix_core_MAPE_table_bestModel = (
fix_class_MAPE_table.loc[:, ["最適モデル"]]
.rename(columns={"最適モデル": "最適モデル(問題サイズ固定)"})
.replace(dict_column_names_JP)
)
print(
pd.merge(
fix_core_MAPE_table_bestModel,
fix_process_MAPE_table_bestModel,
how="outer",
right_index=True,
left_index=True,
)
.sort_index()
.style.to_latex(
caption=f"ベンチマークプログラム{benchmark_name.upper()}における最適モデル",
# index=True,
label=f"{date}bestModelCombinationIn{benchmark_name.upper()}",
)
)
\begin{table}
\caption{ベンチマークプログラムCGにおける最適モデル}
\label{2022年1月22日bestModelCombinationInCG}
\begin{tabular}{lll}
& 最適モデル(問題サイズ固定) & 最適モデル(コア数固定) \\
functionName & & \\
.TAU_application & 線形モデル & 線形モデル \\
ALLOC_SPACE & 線形モデル & 線形モデル \\
CG & 線形モデル & 線形モデル \\
CONJ_GRAD & 反比例モデル & 対数モデル \\
ICNVRT & 反比例モデル & 線形モデル \\
INITIALIZE_MPI & 線形モデル & 線形モデル \\
MAKEA & 線形モデル & 線形モデル \\
MPI_Barrier() & 線形モデル & 線形モデル \\
MPI_Bcast() & 線形モデル & 線形モデル \\
MPI_Comm_rank() & 線形モデル & 線形モデル \\
MPI_Comm_size() & 線形モデル & 線形モデル \\
MPI_Finalize() & 線形モデル & 線形モデル \\
MPI_Init() & 線形モデル & 線形モデル \\
MPI_Irecv() & 対数モデル & 対数モデル \\
MPI_Reduce() & 線形モデル & 線形モデル \\
MPI_Send() & 対数モデル & 対数モデル \\
MPI_Wait() & 対数モデル & 対数モデル \\
SETUP_PROC_INFO & 線形モデル & 線形モデル \\
SETUP_SUBMATRIX_INFO & 線形モデル & 線形モデル \\
SPARSE & 線形モデル & 線形モデル \\
SPRNVC & 反比例モデル & 線形モデル \\
VECSET & 反比例モデル & 線形モデル \\
\end{tabular}
\end{table}
returnDictAboutMAPETable()は生データを取得する機能とMAPE表を作成する機能がまとまっている。
returnDictAboutMAPETable()における引数で生データの取得のみに必要な引数は除く問題サイズ固定時とコア数固定時での最適モデルから予測される組み合わせモデルが、実際に選択される組み合わせモデルと異なる。したがって、その異なっている原因がプログラムの実装ミスに由来するものなのかどうかを検証する。
やること(TODO)
resultsDict = {}
classes = ["A", "B", "C", "D", "E", "F"]
processes = [2, 4, 8, 16, 32, 64, 128]
benchmark_name = "cg"
# ベンチマークプログラム名のリスト
dictColumnNamesJP = {
"functionName": "関数名",
"modelLin": "線形",
"modelLinAndIp": "線形&反比例",
"modelLinAndLog": "線形&対数",
"modelIp": "反比例",
"modelIpAndLin": "反比例&線形",
"modelIpAndLog": "反比例&対数",
"modelLog": "対数",
"modelLogAndLin": "対数&線形",
"modelLogAndIp": "対数&反比例",
}
model_names_list_jp = list(dictColumnNamesJP.values())
model_names_list_jp.remove("関数名")
model_names_list = list(dictColumnNamesJP.keys())
model_names_list.remove("functionName")
date = "2022年1月15日"
target_function_names = ["CONJ_GRAD", "ICNVRT", "SPRNVC", "VECSET"]
expVar = returnExplanatoryVariablesList(
benchmarkName=benchmark_name,
classes=classes,
processes=processes,
csvDirPath=csvDirPath,
baseExpVar=True,
initExpVar=True,
)
rawDF = returnCollectedExistingData(
benchmarkNames=[benchmark_name],
classes=classes,
processes=processes,
csvDirPath=csvDirPath,
)
target_functions_rawDF = rawDF[
(rawDF["functionName"] == target_function_names[0])
| (rawDF["functionName"] == target_function_names[1])
| (rawDF["functionName"] == target_function_names[2])
| (rawDF["functionName"] == target_function_names[3])
]
# 説明変数用に問題サイズを数値化した列を追加
strListProblemSize = target_functions_rawDF["benchmarkClass"].tolist()
intListProblemSize = convertBenchmarkClasses_problemSizeInNPB(
inputList=strListProblemSize
)
target_functions_rawDF["intBenchmarkClass"] = intListProblemSize
# 説明変数用に問題サイズ由来のほかの数値を保持する列を追加
target_functions_rawDF = addInitDataToRawDF(target_functions_rawDF)
# 目的変数の列名を保持したリスト
res_var = ["functionCallNum"]
# 説明変数の列名を保持したリスト
exp_var = target_functions_rawDF.columns.tolist()
exp_var.remove("functionName")
exp_var.remove("benchmarkName")
exp_var.remove("benchmarkClass")
print(exp_var)
# 結果を格納するためのDFを作成
# resultDF = pd.DataFrame(columns=["functionName"] + model_names_list)
# print(resultDF)
result_series_list = []
for target_function_name in target_function_names:
# 関数ごとに生データを取得
target_function_rawDF = target_functions_rawDF[
target_functions_rawDF["functionName"] == target_function_name
]
# モデルの構築
models = Models(
inputDF=target_function_rawDF,
expVarColNames=exp_var,
resVarColNames=res_var,
targetDF=None,
modelNames=model_names_list,
)
models.setUpDataBeforeCalcLr()
models.calcLr()
models.calcMAPE()
# MAPEの算出
dictCalcedMAPE = models.returnCalculatedMAPE()
# 算出されたMAPEの数値を小数第一位までにする
for key in dictCalcedMAPE.keys():
dictCalcedMAPE[key] = int(dictCalcedMAPE[key] * 10) / 10
# 関数ごとの結果に格納
dict_for_series = copy.deepcopy(dictCalcedMAPE)
dict_for_series["functionName"] = target_function_name
series = pd.Series(dict_for_series)
result_series_list.append(series)
# resultDF = resultDF.append(series, ignore_index=True)
resultDF = pd.DataFrame(result_series_list)
resultDF = resultDF.rename(columns=dictColumnNamesJP)
print(
resultDF.style.to_latex(
caption=f"ベンチマークプログラム{benchmark_name.upper()}における特定の関数でのMAPE",
# index=False,
label=f"{date}bestModelCombinationIn{benchmark_name.upper()}inTargetFunctions",
)
)
# df = returnDictAboutMAPETable(
# benchmarkName=benchmark_name,
# classes=classes,
# targetClass=None,
# processes=processes,
# targetProcess=None,
# expVar=expVar,
# csvDirPath=csvDirPath,
# modelNames=model_names_list,
# )
# df
/tmp/ipykernel_137/2464667175.py:57: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy /tmp/ipykernel_137/3247450754.py:305: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy /tmp/ipykernel_137/3247450754.py:305: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy /tmp/ipykernel_137/3247450754.py:305: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy /tmp/ipykernel_137/3247450754.py:305: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
['functionCallNum', 'process', 'intBenchmarkClass', 'na', 'nonzer', 'niter', 'shift']
\begin{table}
\caption{ベンチマークプログラムCGにおける特定の関数でのMAPE}
\label{2022年1月15日bestModelCombinationInCGinTargetFunctions}
\begin{tabular}{lrrrrrrrrrl}
& 線形 & 反比例 & 対数 & 線形&反比例 & 線形&対数 & 反比例&線形 & 反比例&対数 & 対数&線形 & 対数&反比例 & 関数名 \\
0 & 0.000000 & 5.300000 & 3.100000 & 5.000000 & 2.600000 & 0.000000 & 2.700000 & 0.000000 & 5.900000 & CONJ_GRAD \\
1 & 0.000000 & 308.300000 & 452.700000 & 435.000000 & 594.200000 & 0.000000 & 218.300000 & 0.000000 & 537.000000 & ICNVRT \\
2 & 0.000000 & 215.600000 & 170.200000 & 275.900000 & 225.300000 & 0.000000 & 95.600000 & 0.000000 & 310.900000 & SPRNVC \\
3 & 0.000000 & 215.600000 & 170.200000 & 275.900000 & 225.300000 & 0.000000 & 95.600000 & 0.000000 & 310.900000 & VECSET \\
\end{tabular}
\end{table}
| 関数名 | コア数固定での最適モデル | 問題サイズ固定での最適モデル |
|---|---|---|
| func_name | model_name | model_name |
上記のような表を作成する。
resultsList = []
# コア数を128に固定し、問題サイズを振る
classes_fix_core = ["A", "B", "C", "D", "E", "F"]
processes_fix_core = [128]
# 問題サイズをBに固定し、コア数を振る
classes_fix_class = ["B"]
processes_fix_class = [2, 4, 8, 16, 32, 64, 128]
# ベンチマークプログラム名のリスト
benchmark_names = ["cg"]
# benchmark_names = ["cg", "ep", "ft", "is", "lu", "mg"]
dictColumnNamesJP = {
"functionName": "関数名",
"modelLin": "線形モデル",
"modelIp": "反比例モデル",
"modelLog": "対数モデル",
}
model_names_list_jp = list(dictColumnNamesJP.values())
model_names_list_jp.remove("関数名")
dict_column_fix_core = {"最適モデル": "最適モデル(コア数固定)"}
dict_column_fix_class = {"最適モデル": "最適モデル(問題サイズ固定)"}
model_names_list = list(dictColumnNamesJP.keys())
model_names_list.remove("functionName")
date = "2022年1月10日"
# (~2022年1月14日)コア数と問題サイズの両方を振る
classes = ["A", "B", "C", "D", "E", "F"]
processes = [2, 4, 8, 16, 32, 64, 128, 256]
resultDict_fix_core = {}
resultDict_fix_class = {}
# resultDictAllWithoutInit = {}
for benchmarkName in benchmark_names:
print(f"% _____benchmarkName={benchmarkName}_____\n\n\n")
# 問題サイズを固定し、コア数を振る
expVar_fix_class = returnExplanatoryVariablesList(
benchmarkName=benchmarkName,
classes=classes_fix_class,
processes=processes_fix_class,
csvDirPath=csvDirPath,
baseExpVar=True,
initExpVar=True,
)
df_fix_class = returnDictAboutMAPETable(
benchmarkName=benchmarkName,
classes=classes_fix_class,
targetClass=None,
processes=processes_fix_class,
targetProcess=None,
expVar=expVar_fix_class,
csvDirPath=csvDirPath,
)
df_fix_class = df_fix_class.rename(columns=dictColumnNamesJP)
df_fix_class = df_fix_class.set_index("関数名")
result_fix_class = addLowestMAPEsModelNameColumn(df_fix_class).rename(
columns=dict_column_fix_class
)
resultDict_fix_class[benchmarkName.upper()] = result_fix_class
# コア数を固定し、問題サイズを振る
expVar_fix_core = returnExplanatoryVariablesList(
benchmarkName=benchmarkName,
classes=classes_fix_core,
processes=processes_fix_core,
csvDirPath=csvDirPath,
baseExpVar=True,
initExpVar=True,
)
df_fix_core = returnDictAboutMAPETable(
benchmarkName=benchmarkName,
classes=classes_fix_core,
targetClass=None,
processes=processes_fix_core,
targetProcess=None,
expVar=expVar_fix_core,
csvDirPath=csvDirPath,
)
df_fix_core = df_fix_core.rename(columns=dictColumnNamesJP)
df_fix_core = df_fix_core.set_index("関数名")
result_fix_core = addLowestMAPEsModelNameColumn(df_fix_core).rename(
columns=dict_column_fix_core
)
resultDict_fix_core[benchmarkName.upper()] = result_fix_core
# 作成した表を結合する
merged_DF = pd.merge(df_fix_class, df_fix_core)
dropped_column_name = merged_DF.columns.to_list()
print(
pd.merge(
result_fix_class.drop(model_names_list_jp, axis=1),
result_fix_core.drop(model_names_list_jp, axis=1),
how="outer",
right_index=True,
left_index=True,
).style.to_latex(
caption=f"ベンチマークプログラム{benchmarkName.upper()}における最適モデル",
label=f"{date}bestModelCombinationIn{benchmarkName.upper()}",
)
)
% _____benchmarkName=cg_____
WARNING:__main__:addLowestMAPEsModelNameColumn():version=1はすでに古い実装となっています。version=2の利用を検討してください。 WARNING:__main__:addLowestMAPEsModelNameColumn():version=1はすでに古い実装となっています。version=2の利用を検討してください。
\begin{table}
\caption{ベンチマークプログラムCGにおける最適モデル}
\label{2022年1月10日bestModelCombinationInCG}
\begin{tabular}{lll}
& 最適モデル(問題サイズ固定) & 最適モデル(コア数固定) \\
関数名 & & \\
.TAU_application & 線形モデル & 線形モデル \\
ALLOC_SPACE & 線形モデル & 線形モデル \\
CG & 線形モデル & 線形モデル \\
CONJ_GRAD & 反比例モデル & 線形モデル \\
ICNVRT & 反比例モデル & 線形モデル \\
INITIALIZE_MPI & 線形モデル & 線形モデル \\
MAKEA & 線形モデル & 線形モデル \\
MPI_Barrier() & 線形モデル & 線形モデル \\
MPI_Bcast() & 線形モデル & 線形モデル \\
MPI_Comm_rank() & 線形モデル & 線形モデル \\
MPI_Comm_size() & 線形モデル & 線形モデル \\
MPI_Finalize() & 線形モデル & 線形モデル \\
MPI_Init() & 線形モデル & 線形モデル \\
MPI_Irecv() & 対数モデル & 線形モデル \\
MPI_Reduce() & 線形モデル & 線形モデル \\
MPI_Send() & 対数モデル & 線形モデル \\
MPI_Wait() & 対数モデル & 線形モデル \\
SETUP_PROC_INFO & 線形モデル & 線形モデル \\
SETUP_SUBMATRIX_INFO & 線形モデル & 線形モデル \\
SPARSE & 線形モデル & 線形モデル \\
SPRNVC & 反比例モデル & 線形モデル \\
VECSET & 反比例モデル & 線形モデル \\
\end{tabular}
\end{table}
resultsDict = {}
# コア数を128に固定し、問題サイズを振る
classes = ["A", "B", "C", "D", "E", "F"]
processes = [2, 4, 8, 16, 32, 64, 128]
# ベンチマークプログラム名のリスト
# benchmark_names = ["cg", "ep", "ft", "is", "lu", "mg"]
dictColumnNamesJP = {
"functionName": "関数名",
"modelLin": "線形",
"modelIp": "反比例",
"modelLog": "対数",
"modelLinAndIp": "線形&反比例",
"modelLinAndLog": "線形&対数",
"modelIpAndLin": "反比例&線形",
"modelIpAndLog": "反比例&対数",
"modelLogAndLin": "対数&線形",
"modelLogAndIp": "対数&反比例",
}
model_names_list_jp = list(dictColumnNamesJP.values())
model_names_list_jp.remove("関数名")
model_names_list = list(dictColumnNamesJP.keys())
model_names_list.remove("functionName")
date = "2022年1月10日"
# コア数と問題サイズの両方を振る
for benchmarkName in benchmark_names:
print(f"% _____benchmarkName={benchmarkName}_____\n\n\n")
expVar = returnExplanatoryVariablesList(
benchmarkName=benchmarkName,
classes=classes,
processes=processes,
csvDirPath=csvDirPath,
baseExpVar=True,
initExpVar=True,
)
print(f"expVar={expVar}")
df = returnDictAboutMAPETable(
benchmarkName=benchmarkName,
classes=classes,
targetClass=None,
processes=processes,
targetProcess=None,
expVar=expVar,
csvDirPath=csvDirPath,
modelNames=model_names_list,
)
df = df.rename(columns=dictColumnNamesJP)
df = df.set_index("関数名")
result = addLowestMAPEsModelNameColumn(df)
resultsDict[benchmarkName.upper()] = result
print(
result.style.to_latex(
caption=f"ベンチマークプログラム{benchmarkName.upper()}における各モデルでのMAPE",
label=f"{date}allModelsMAPEin{benchmarkName.upper()}",
)
)
# print(
# pd.merge(
# result_fix_class.drop(model_names_list_jp, axis=1),
# result_fix_core.drop(model_names_list_jp, axis=1),
# how="outer",
# right_index=True,
# left_index=True,
# ).style.to_latex(caption=f"ベンチマークプログラム{benchmarkName.upper()}における最適モデル", label="bestModelCombinationInCG")
# )
% _____benchmarkName=cg_____ expVar=['process', 'intBenchmarkClass', 'na', 'nonzer', 'niter', 'shift']
WARNING:__main__:addLowestMAPEsModelNameColumn():version=1はすでに古い実装となっています。version=2の利用を検討してください。
\begin{table}
\caption{ベンチマークプログラムCGにおける各モデルでのMAPE}
\label{2022年1月10日allModelsMAPEinCG}
\begin{tabular}{lrrrrrrrrrl}
& 線形 & 反比例 & 対数 & 線形&反比例 & 線形&対数 & 反比例&線形 & 反比例&対数 & 対数&線形 & 対数&反比例 & 最適モデル \\
関数名 & & & & & & & & & & \\
.TAU_application & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
ALLOC_SPACE & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
CG & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
CONJ_GRAD & 14.400000 & 15.500000 & 16.000000 & 14.400000 & 14.400000 & 15.500000 & 15.500000 & 16.000000 & 16.000000 & 線形 \\
ICNVRT & 471.800000 & 727.800000 & 768.900000 & 471.800000 & 471.800000 & 727.800000 & 727.800000 & 768.900000 & 768.900000 & 線形 \\
INITIALIZE_MPI & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MAKEA & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI_Barrier() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI_Bcast() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI_Comm_rank() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI_Comm_size() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI_Finalize() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI_Init() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI_Irecv() & 35.800000 & 38.500000 & 36.600000 & 35.800000 & 35.800000 & 38.500000 & 38.500000 & 36.600000 & 36.600000 & 線形 \\
MPI_Reduce() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI_Send() & 35.800000 & 38.500000 & 36.600000 & 35.800000 & 35.800000 & 38.500000 & 38.500000 & 36.600000 & 36.600000 & 線形 \\
MPI_Wait() & 35.800000 & 38.500000 & 36.600000 & 35.800000 & 35.800000 & 38.500000 & 38.500000 & 36.600000 & 36.600000 & 線形 \\
SETUP_PROC_INFO & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
SETUP_SUBMATRIX_INFO & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
SPARSE & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
SPRNVC & 238.600000 & 340.800000 & 365.600000 & 238.600000 & 238.600000 & 340.800000 & 340.800000 & 365.600000 & 365.600000 & 線形 \\
VECSET & 238.600000 & 340.800000 & 365.600000 & 238.600000 & 238.600000 & 340.800000 & 340.800000 & 365.600000 & 365.600000 & 線形 \\
\end{tabular}
\end{table}
%reset -f
# ipynb形式のライブラリのインポート
%run ./lib/lib.ipynb
# 生データの入ったCSVファイルの保持されたディレクトリ名を格納している変数
csvDirPath = "./csv_files/"
benchmarkNames = ["cg", "ep", "ft", "is", "lu", "mg"]
DEBUG:__main__:hello
# TODO: 👆やる
# 説明変数
plotX = np.linspace(10, 20, 10)
plotY = np.linspace(10, 20, 10)
plotZ = np.linspace(10, 20, 10)
# 目的変数
# a = random.uniform(1, 30)
# b = random.uniform(1, 30)
# c = random.uniform(1, 30)
# d = random.uniform(1, 30)
a = 10
b = 20
c = 30
d = 40
print(f"a = {a}, b = {b}, c = {c}, d = {d}")
plotT = (a * plotX) / (b * plotY + c * plotZ) + d
# DFを作成する
# カラム名のリスト
columnNames = ["plotX", "plotY", "plotZ", "plotT"]
datumForDF = [plotX, plotY, plotZ, plotT]
inputDFForTest = pd.DataFrame(index=columnNames, data=datumForDF).T
inputDFForTest["functionName"] = "functionName"
# 目的変数・説明変数のカラム名のリスト
# 目的変数のカラム名のリスト
columnNamesForExp = ["plotX", "plotY", "plotZ"]
# 説明変数のカラム名のリスト
columnNamesForRes = ["plotT"]
def target_func(inputsList, a, b, c, d):
X = inputsList[0]
Y = inputsList[1]
Z = inputsList[2]
T = (a * X) / (b * Y + c * Z) + d
return T
print(
f"target_func(inputsList = [plotX, plotY, plotZ], a = 2, b = 2, c = 2, d = 2) = {target_func(inputsList = [plotX, plotY, plotZ], a = a, b = b, c = c, d = d)}"
)
from scipy.optimize import curve_fit
popt, pcov = curve_fit(target_func, [plotX, plotY, plotZ], plotT)
print(f"popt = {popt}, pcov = {pcov}")
a = 10, b = 20, c = 30, d = 40 target_func(inputsList = [plotX, plotY, plotZ], a = 2, b = 2, c = 2, d = 2) = [40.2 40.2 40.2 40.2 40.2 40.2 40.2 40.2 40.2 40.2] popt = [97.00000145 1.00000002 0.99999999 -8.30000063], pcov = [[-3.82831018e-14 -2.20546950e-16 -2.78576182e-16 7.03781513e-15] [-2.00275526e-16 1.09243360e-17 -1.19276412e-17 7.58076160e-17] [-2.96059473e-16 -1.23778908e-17 7.70988212e-18 3.48305263e-17] [ 7.10542736e-15 7.50247747e-17 3.70074342e-17 -8.35932630e-16]]
print(f"type(popt)={type(popt)}, type(pcov)={type(pcov)}")
type(popt)=<class 'numpy.ndarray'>, type(pcov)=<class 'numpy.ndarray'>
predictedPlotT = 0
predictedPlotT = target_func(
inputsList=[plotX, plotY, plotZ], a=popt[0], b=popt[1], c=popt[2], d=popt[3]
)
print(f"realData = {plotT}\npredict = {predictedPlotT}")
realData = [40.2 40.2 40.2 40.2 40.2 40.2 40.2 40.2 40.2 40.2] predict = [40.2 40.2 40.2 40.2 40.2 40.2 40.2 40.2 40.2 40.2]
%reset -f
# ipynb形式のライブラリのインポート
%run ./lib/lib.ipynb
# 生データの入ったCSVファイルの保持されたディレクトリ名を格納している変数
csvDirPath = "./csv_files/"
benchmarkNames = ["cg", "ep", "ft", "is", "lu", "mg"]
DEBUG:__main__:hello
L = [1, 3, 5]
print(L)
print(L[0], L[1], L[2])
[1, 3, 5] 1 3 5
print(*L)
1 3 5
def foo(i, j, k):
print(f"i = {i}, j = {j}、k = {k}")
l = ["Hello", "World", "Python"]
foo(*L)
i = 1, j = 3、k = 5
print(*range(0, 5))
0 1 2 3 4
import numpy as np
def foo(
list_input: list[list[float]], *list_coef, float_intercept: float
) -> list[float]:
if len(list_input) != len(list_coef) + 1:
print("exception")
list_return = []
for i in range(len(list_input)):
numerator: float = 0
denominator: list[float] = 0
for j in range(len(list_input[i])):
if j == 0:
numerator = list_input[i][j] * list_coef[i]
else:
denominator += list_input[i][j] * list_coef[i]
def test_foo():
# 係数
coef_a = 10
coef_b = 20
coef_c = 30
coef_d = 40
coef_e = 50
# 切片
inter = -5
# 入力の要素
list_a = [1, 1, 1, 1]
list_b = [2, 2, 2, 2]
list_c = [3, 3, 3, 3]
list_d = [4, 4, 4, 4]
list_e = [5, 5, 5, 5]
# 3要素
list_input = [list_a, list_b, list_c]
# 5要素
list_input = [list_a, list_b, list_c, list_d, list_e]
test_foo()
import numpy as np
def foo(
list_input: list[list[float]], *list_coef, float_intercept: float
) -> list[float]:
if len(list_input) != len(list_coef) + 1:
print("exception")
list_return = []
for i in range(len(list_input)):
numerator: float = 0
denominator: list[float] = 0
for j in range(len(list_input[i])):
if j == 0:
numerator = list_input[i][j] * list_coef[i]
else:
denominator += list_input[i][j] * list_coef[i]
list_return.append(numerator / denominator + float_intercept)
return list_return
def test_foo():
# 係数
coef_a = 10
coef_b = 20
coef_c = 30
coef_d = 40
coef_e = 50
# 切片
inter = -5
# 入力の要素
list_a = [1, 1, 1, 1]
list_b = [2, 2, 2, 2]
list_c = [3, 3, 3, 3]
list_d = [4, 4, 4, 4]
list_e = [5, 5, 5, 5]
# 3要素
list_input: list[list[float]] = [list_a, list_b, list_c]
list_expect_numerator: list[float] = coef_a * list_a
list_expect_denominator: list[float] = coef_b * list_b + coef_c * list_c
list_expect_result: list[float] = []
for j in range(len(list_expect_numerator)):
list_expect_result.append(
list_expect_numerator[j] / list_expect_denominator[j] + inter
)
list_actual_result = foo(
coef_a, coef_b, coef_c, list_input=list_input, float_intercept=inter
)
assert list_expect_result == list_actual_result
# 5要素
list_input = [list_a, list_b, list_c, list_d, list_e]
# test_foo()
%reset -f
# ipynb形式のライブラリのインポート
%run ./lib/lib.ipynb
# 生データの入ったCSVファイルの保持されたディレクトリ名を格納している変数
csvDirPath = "./csv_files/"
benchmarkNames = ["cg", "ep", "ft", "is", "lu", "mg"]
DEBUG:__main__:hello
benchmark_name = "cg"
classes = ["A", "B", "C", "D", "E", "F"]
processes = [2, 4, 8, 16, 32, 64, 128, 256]
dict_column_names_JP = {
"functionName": "関数名",
"modelLin": "線形",
"modelIp": "反比例",
"modelLog": "対数",
"modelLinAndIp": "線形&反比例",
"modelLinAndLog": "線形&対数",
"modelIpAndLin": "反比例&線形",
"modelIpAndLog": "反比例&対数",
"modelLogAndLin": "対数&線形",
"modelLogAndIp": "対数&反比例",
}
model_names_list_JP = list(dict_column_names_JP.values())
model_names_list_JP.remove("関数名")
model_names_list = list(dict_column_names_JP.keys())
model_names_list.remove("functionName")
date = "2022年2月2日"
# コア数と問題サイズの両方を振る
rawDF = return_rawDF_with_init_param(
benchmark_name=benchmark_name,
classes=classes,
processes=processes,
csv_dir_path="./csv_files/",
)
exp_var = rawDF.columns.tolist()
for element_be_removed in [
"functionName",
"functionCallNum",
"intBenchmarkClass",
"benchmarkName",
"benchmarkClass",
]:
exp_var.remove(element_be_removed)
res_var = ["functionCallNum"]
rawDF_CONJ_GRAD = rawDF[rawDF["functionName"] == "CONJ_GRAD"]
rawDF_CONJ_GRAD
# returned_MAPE_table = return_MAPE_Table_DF_from_rawDF(
# rawDF=rawDF,
# exp_var_list=exp_var,
# res_var_list=res_var,
# model_name_list=model_names_list,
# )
# df = returned_MAPE_table.rename(columns=dict_column_names_JP)
# df = df.set_index("関数名")
# df = df.sort_index(axis=0)
# df = df.sort_index(axis=1)
# df = addLowestMAPEsModelNameColumn(df, version=2, model_name_list=model_names_list_JP)
# print(
# df.to_latex(
# caption=f"ベンチマークプログラム{benchmark_name.upper()}における各モデルでのMAPE",
# label=f"{date}everyModelsMAPEtableIn{benchmark_name.upper()}",
# )
# )
| functionName | functionCallNum | benchmarkName | benchmarkClass | process | intBenchmarkClass | na | nonzer | niter | shift | |
|---|---|---|---|---|---|---|---|---|---|---|
| 5 | CONJ_GRAD | 16.0 | cg | A | 2 | 1 | 14000 | 11 | 15 | 20 |
| 5 | CONJ_GRAD | 16.0 | cg | A | 4 | 1 | 14000 | 11 | 15 | 20 |
| 5 | CONJ_GRAD | 16.0 | cg | A | 8 | 1 | 14000 | 11 | 15 | 20 |
| 5 | CONJ_GRAD | 16.0 | cg | A | 16 | 1 | 14000 | 11 | 15 | 20 |
| 5 | CONJ_GRAD | 16.0 | cg | A | 32 | 1 | 14000 | 11 | 15 | 20 |
| 5 | CONJ_GRAD | 16.0 | cg | A | 64 | 1 | 14000 | 11 | 15 | 20 |
| 5 | CONJ_GRAD | 16.0 | cg | A | 128 | 1 | 14000 | 11 | 15 | 20 |
| 6 | CONJ_GRAD | 16.0 | cg | A | 256 | 1 | 14000 | 11 | 15 | 20 |
| 2 | CONJ_GRAD | 31.0 | cg | B | 2 | 4 | 75000 | 13 | 75 | 60 |
| 2 | CONJ_GRAD | 46.0 | cg | B | 4 | 4 | 75000 | 13 | 75 | 60 |
| 2 | CONJ_GRAD | 76.0 | cg | B | 8 | 4 | 75000 | 13 | 75 | 60 |
| 2 | CONJ_GRAD | 76.0 | cg | B | 16 | 4 | 75000 | 13 | 75 | 60 |
| 2 | CONJ_GRAD | 76.0 | cg | B | 32 | 4 | 75000 | 13 | 75 | 60 |
| 2 | CONJ_GRAD | 76.0 | cg | B | 64 | 4 | 75000 | 13 | 75 | 60 |
| 2 | CONJ_GRAD | 76.0 | cg | B | 128 | 4 | 75000 | 13 | 75 | 60 |
| 2 | CONJ_GRAD | 76.0 | cg | B | 256 | 4 | 75000 | 13 | 75 | 60 |
| 2 | CONJ_GRAD | 76.0 | cg | C | 2 | 16 | 150000 | 15 | 75 | 110 |
| 2 | CONJ_GRAD | 76.0 | cg | C | 4 | 16 | 150000 | 15 | 75 | 110 |
| 2 | CONJ_GRAD | 76.0 | cg | C | 8 | 16 | 150000 | 15 | 75 | 110 |
| 2 | CONJ_GRAD | 76.0 | cg | C | 16 | 16 | 150000 | 15 | 75 | 110 |
| 2 | CONJ_GRAD | 76.0 | cg | C | 32 | 16 | 150000 | 15 | 75 | 110 |
| 2 | CONJ_GRAD | 76.0 | cg | C | 64 | 16 | 150000 | 15 | 75 | 110 |
| 2 | CONJ_GRAD | 76.0 | cg | C | 128 | 16 | 150000 | 15 | 75 | 110 |
| 2 | CONJ_GRAD | 76.0 | cg | C | 256 | 16 | 150000 | 15 | 75 | 110 |
| 2 | CONJ_GRAD | 76.0 | cg | D | 2 | 256 | 1500000 | 21 | 100 | 500 |
| 2 | CONJ_GRAD | 76.0 | cg | D | 4 | 256 | 1500000 | 21 | 100 | 500 |
| 2 | CONJ_GRAD | 76.0 | cg | D | 8 | 256 | 1500000 | 21 | 100 | 500 |
| 2 | CONJ_GRAD | 101.0 | cg | D | 16 | 256 | 1500000 | 21 | 100 | 500 |
| 2 | CONJ_GRAD | 101.0 | cg | D | 32 | 256 | 1500000 | 21 | 100 | 500 |
| 2 | CONJ_GRAD | 101.0 | cg | D | 64 | 256 | 1500000 | 21 | 100 | 500 |
| 2 | CONJ_GRAD | 101.0 | cg | D | 128 | 256 | 1500000 | 21 | 100 | 500 |
| 2 | CONJ_GRAD | 101.0 | cg | D | 256 | 256 | 1500000 | 21 | 100 | 500 |
| 2 | CONJ_GRAD | 101.0 | cg | E | 32 | 4096 | 9000000 | 26 | 100 | 1500 |
| 2 | CONJ_GRAD | 101.0 | cg | E | 64 | 4096 | 9000000 | 26 | 100 | 1500 |
| 2 | CONJ_GRAD | 101.0 | cg | E | 128 | 4096 | 9000000 | 26 | 100 | 1500 |
| 2 | CONJ_GRAD | 101.0 | cg | E | 256 | 4096 | 9000000 | 26 | 100 | 1500 |
| 2 | CONJ_GRAD | 101.0 | cg | F | 256 | 65536 | 54000000 | 26 | 100 | 1500 |
rawDF
| functionName | functionCallNum | benchmarkName | benchmarkClass | process | intBenchmarkClass | na | nonzer | niter | shift | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | .TAU_application | 1.0 | cg | A | 2 | 1 | 14000 | 11 | 15 | 20 |
| 1 | CG | 1.0 | cg | A | 2 | 1 | 14000 | 11 | 15 | 20 |
| 2 | MPI_Finalize() | 1.0 | cg | A | 2 | 1 | 14000 | 11 | 15 | 20 |
| 3 | INITIALIZE_MPI | 1.0 | cg | A | 2 | 1 | 14000 | 11 | 15 | 20 |
| 4 | MPI_Init() | 1.0 | cg | A | 2 | 1 | 14000 | 11 | 15 | 20 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 17 | MPI_Reduce() | 1.0 | cg | F | 256 | 65536 | 54000000 | 26 | 100 | 1500 |
| 18 | SETUP_SUBMATRIX_INFO | 1.0 | cg | F | 256 | 65536 | 54000000 | 26 | 100 | 1500 |
| 19 | MPI_Comm_size() | 1.0 | cg | F | 256 | 65536 | 54000000 | 26 | 100 | 1500 |
| 20 | MPI_Comm_rank() | 1.0 | cg | F | 256 | 65536 | 54000000 | 26 | 100 | 1500 |
| 21 | SETUP_PROC_INFO | 1.0 | cg | F | 256 | 65536 | 54000000 | 26 | 100 | 1500 |
814 rows × 10 columns
rawDF_CONJ_GRAD = rawDF[rawDF["functionName"] == "CONJ_GRAD"]
rawDF_CONJ_GRAD
| functionName | functionCallNum | benchmarkName | benchmarkClass | process | intBenchmarkClass | na | nonzer | niter | shift | |
|---|---|---|---|---|---|---|---|---|---|---|
| 5 | CONJ_GRAD | 16.0 | cg | A | 2 | 1 | 14000 | 11 | 15 | 20 |
| 5 | CONJ_GRAD | 16.0 | cg | A | 4 | 1 | 14000 | 11 | 15 | 20 |
| 5 | CONJ_GRAD | 16.0 | cg | A | 8 | 1 | 14000 | 11 | 15 | 20 |
| 5 | CONJ_GRAD | 16.0 | cg | A | 16 | 1 | 14000 | 11 | 15 | 20 |
| 5 | CONJ_GRAD | 16.0 | cg | A | 32 | 1 | 14000 | 11 | 15 | 20 |
| 5 | CONJ_GRAD | 16.0 | cg | A | 64 | 1 | 14000 | 11 | 15 | 20 |
| 5 | CONJ_GRAD | 16.0 | cg | A | 128 | 1 | 14000 | 11 | 15 | 20 |
| 6 | CONJ_GRAD | 16.0 | cg | A | 256 | 1 | 14000 | 11 | 15 | 20 |
| 2 | CONJ_GRAD | 31.0 | cg | B | 2 | 4 | 75000 | 13 | 75 | 60 |
| 2 | CONJ_GRAD | 46.0 | cg | B | 4 | 4 | 75000 | 13 | 75 | 60 |
| 2 | CONJ_GRAD | 76.0 | cg | B | 8 | 4 | 75000 | 13 | 75 | 60 |
| 2 | CONJ_GRAD | 76.0 | cg | B | 16 | 4 | 75000 | 13 | 75 | 60 |
| 2 | CONJ_GRAD | 76.0 | cg | B | 32 | 4 | 75000 | 13 | 75 | 60 |
| 2 | CONJ_GRAD | 76.0 | cg | B | 64 | 4 | 75000 | 13 | 75 | 60 |
| 2 | CONJ_GRAD | 76.0 | cg | B | 128 | 4 | 75000 | 13 | 75 | 60 |
| 2 | CONJ_GRAD | 76.0 | cg | B | 256 | 4 | 75000 | 13 | 75 | 60 |
| 2 | CONJ_GRAD | 76.0 | cg | C | 2 | 16 | 150000 | 15 | 75 | 110 |
| 2 | CONJ_GRAD | 76.0 | cg | C | 4 | 16 | 150000 | 15 | 75 | 110 |
| 2 | CONJ_GRAD | 76.0 | cg | C | 8 | 16 | 150000 | 15 | 75 | 110 |
| 2 | CONJ_GRAD | 76.0 | cg | C | 16 | 16 | 150000 | 15 | 75 | 110 |
| 2 | CONJ_GRAD | 76.0 | cg | C | 32 | 16 | 150000 | 15 | 75 | 110 |
| 2 | CONJ_GRAD | 76.0 | cg | C | 64 | 16 | 150000 | 15 | 75 | 110 |
| 2 | CONJ_GRAD | 76.0 | cg | C | 128 | 16 | 150000 | 15 | 75 | 110 |
| 2 | CONJ_GRAD | 76.0 | cg | C | 256 | 16 | 150000 | 15 | 75 | 110 |
| 2 | CONJ_GRAD | 76.0 | cg | D | 2 | 256 | 1500000 | 21 | 100 | 500 |
| 2 | CONJ_GRAD | 76.0 | cg | D | 4 | 256 | 1500000 | 21 | 100 | 500 |
| 2 | CONJ_GRAD | 76.0 | cg | D | 8 | 256 | 1500000 | 21 | 100 | 500 |
| 2 | CONJ_GRAD | 101.0 | cg | D | 16 | 256 | 1500000 | 21 | 100 | 500 |
| 2 | CONJ_GRAD | 101.0 | cg | D | 32 | 256 | 1500000 | 21 | 100 | 500 |
| 2 | CONJ_GRAD | 101.0 | cg | D | 64 | 256 | 1500000 | 21 | 100 | 500 |
| 2 | CONJ_GRAD | 101.0 | cg | D | 128 | 256 | 1500000 | 21 | 100 | 500 |
| 2 | CONJ_GRAD | 101.0 | cg | D | 256 | 256 | 1500000 | 21 | 100 | 500 |
| 2 | CONJ_GRAD | 101.0 | cg | E | 32 | 4096 | 9000000 | 26 | 100 | 1500 |
| 2 | CONJ_GRAD | 101.0 | cg | E | 64 | 4096 | 9000000 | 26 | 100 | 1500 |
| 2 | CONJ_GRAD | 101.0 | cg | E | 128 | 4096 | 9000000 | 26 | 100 | 1500 |
| 2 | CONJ_GRAD | 101.0 | cg | E | 256 | 4096 | 9000000 | 26 | 100 | 1500 |
| 2 | CONJ_GRAD | 101.0 | cg | F | 256 | 65536 | 54000000 | 26 | 100 | 1500 |
print(f"exp_var = {exp_var}")
print(f"res_var = {res_var}")
print(f"rawDF_CONJ_GRAD.shape = {rawDF_CONJ_GRAD.shape}")
exp_var = ['process', 'na', 'nonzer', 'niter', 'shift'] res_var = ['functionCallNum'] rawDF_CONJ_GRAD.shape = (37, 10)
X = rawDF_CONJ_GRAD.filter(items=exp_var)
Y = rawDF_CONJ_GRAD.filter(items=res_var)
X_train, X_test, Y_train, Y_test = train_test_split(
X, Y, test_size=0.1, random_state=1234
)
# X_train, X_test, Y_train, Y_test
reg = DecisionTreeRegressor(max_leaf_nodes=20)
model = reg.fit(X_train, Y_train)
print(f"model = {model}")
model = DecisionTreeRegressor(max_leaf_nodes=20)
YHat = model.predict(X_test)
r2 = r2_score(Y_test, YHat)
print("R^2 = ", r2)
R^2 = 0.9516129032258065
dot_data = tree.export_graphviz(
model,
out_file=None,
# feature_names=rawDF_CONJ_GRAD.feature_names,
class_names="functionCallNum",
filled=True,
)
graph = pydotplus.graph_from_dot_data(dot_data)
Image(graph.create_png())
%reset -f
# ipynb形式のライブラリのインポート
%run ./lib/lib.ipynb
# 生データの入ったCSVファイルの保持されたディレクトリ名を格納している変数
csvDirPath = "./csv_files/"
benchmarkNames = ["cg", "ep", "ft", "is", "lu", "mg"]
DEBUG:__main__:hello
benchmark_name = "cg"
classes = ["A", "B", "C", "D", "E", "F"]
processes = [2, 4, 8, 16, 32, 64, 128, 256]
dict_column_names_JP = {
"functionName": "関数名",
"modelLin": "線形",
"modelIp": "反比例",
"modelLog": "対数",
"modelLinAndIp": "線形&反比例",
"modelLinAndLog": "線形&対数",
"modelIpAndLin": "反比例&線形",
"modelIpAndLog": "反比例&対数",
"modelLogAndLin": "対数&線形",
"modelLogAndIp": "対数&反比例",
"modelProcessDividedByProblemSize": "プロセス数を問題サイズで割る",
}
model_names_list_JP = list(dict_column_names_JP.values())
model_names_list_JP.remove("関数名")
model_names_list = list(dict_column_names_JP.keys())
model_names_list.remove("functionName")
date = "2022年3月9日"
# コア数と問題サイズの両方を振る
rawDF = return_rawDF_with_init_param(
benchmark_name=benchmark_name,
classes=classes,
processes=processes,
csv_dir_path="./csv_files/",
)
exp_var = rawDF.columns.tolist()
for element_be_removed in [
"functionName",
"functionCallNum",
"intBenchmarkClass",
"benchmarkName",
"benchmarkClass",
]:
exp_var.remove(element_be_removed)
res_var = ["functionCallNum"]
print(f"exp_var = {exp_var}")
returned_MAPE_table = return_MAPE_Table_DF_from_rawDF(
rawDF=rawDF,
exp_var_list=exp_var,
res_var_list=res_var,
model_name_list=model_names_list,
)
df = returned_MAPE_table.rename(columns=dict_column_names_JP)
df = df.set_index("関数名")
df = df.sort_index(axis=0)
df = df.sort_index(axis=1)
df = addLowestMAPEsModelNameColumn(df, version=2, model_name_list=model_names_list_JP)
print(
df.style.to_latex(
caption=f"ベンチマークプログラム{benchmark_name.upper()}における各モデルでのMAPE",
label=f"{date}allModelsMAPEin{benchmark_name.upper()}",
)
)
exp_var = ['process', 'na', 'nonzer', 'niter', 'shift']
/usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated
\begin{table}
\caption{ベンチマークプログラムCGにおける各モデルでのMAPE}
\label{2022年3月9日allModelsMAPEinCG}
\begin{tabular}{lrrrrrrrrrrl}
& プロセス数を問題サイズで割る & 反比例 & 反比例&対数 & 反比例&線形 & 対数 & 対数&反比例 & 対数&線形 & 線形 & 線形&反比例 & 線形&対数 & 最適モデル \\
関数名 & & & & & & & & & & & \\
.TAU_application & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
ALLOC_SPACE & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
CG & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
CONJ_GRAD & 37.550135 & 13.876670 & 13.732936 & 14.498431 & 14.804389 & 14.717362 & 15.248570 & 13.161174 & 12.412559 & 12.755412 & 線形&反比例 \\
ICNVRT & 159.614199 & 662.481358 & 2504.274677 & 894.574028 & 2443.699386 & 3811.490854 & 815.789372 & 502.123230 & 6621.011052 & 2283.060136 & プロセス数を問題サイズで割る \\
INITIALIZE_MPI & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MAKEA & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI_Barrier() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI_Bcast() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI_Comm_rank() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI_Comm_size() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI_Finalize() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI_Init() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI_Irecv() & 208.955855 & 38.629896 & 38.552817 & 39.029932 & 35.489902 & 35.487038 & 35.954193 & 34.989183 & 35.515528 & 35.660459 & 線形 \\
MPI_Reduce() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI_Send() & 208.955855 & 38.629896 & 38.552817 & 39.029932 & 35.489902 & 35.487038 & 35.954193 & 34.989183 & 35.515528 & 35.660459 & 線形 \\
MPI_Wait() & 208.955855 & 38.629896 & 38.552817 & 39.029932 & 35.489902 & 35.487038 & 35.954193 & 34.989183 & 35.515528 & 35.660459 & 線形 \\
SETUP_PROC_INFO & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
SETUP_SUBMATRIX_INFO & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
SPARSE & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
SPRNVC & 153.544935 & 336.648986 & 1321.895548 & 345.739861 & 1306.978016 & 1503.097645 & 338.951990 & 198.005854 & 2573.964383 & 1221.597908 & プロセス数を問題サイズで割る \\
VECSET & 153.544935 & 336.648986 & 1321.895548 & 345.739861 & 1306.978016 & 1503.097645 & 338.951990 & 198.005854 & 2573.964383 & 1221.597908 & プロセス数を問題サイズで割る \\
\end{tabular}
\end{table}
%reset -f
# ipynb形式のライブラリのインポート
%run ./lib/lib.ipynb
# 生データの入ったCSVファイルの保持されたディレクトリ名を格納している変数
csvDirPath = "./csv_files/"
benchmarkNames = ["cg", "ep", "ft", "is", "lu", "mg"]
DEBUG:__main__:hello
benchmark_name = "cg"
classes = ["A", "B", "C", "D", "E", "F"]
processes = [2, 4, 8, 16, 32, 64, 128, 256]
dict_column_names_JP = {
"functionName": "関数名",
"modelLin": "線形",
"modelIp": "反比例",
"modelLog": "対数",
"modelLinAndIp": "線形&反比例",
"modelLinAndLog": "線形&対数",
"modelIpAndLin": "反比例&線形",
"modelIpAndLog": "反比例&対数",
"modelLogAndLin": "対数&線形",
"modelLogAndIp": "対数&反比例",
}
model_names_list_JP = list(dict_column_names_JP.values())
model_names_list_JP.remove("関数名")
model_names_list = list(dict_column_names_JP.keys())
model_names_list.remove("functionName")
date = "2022年2月2日"
# コア数と問題サイズの両方を振る
rawDF = return_rawDF_with_init_param(
benchmark_name=benchmark_name,
classes=classes,
processes=processes,
csv_dir_path="./csv_files/",
)
exp_var = rawDF.columns.tolist()
for element_be_removed in [
"functionName",
"functionCallNum",
"intBenchmarkClass",
"benchmarkName",
"benchmarkClass",
]:
exp_var.remove(element_be_removed)
res_var = ["functionCallNum"]
rawDF_CONJ_GRAD = rawDF[rawDF["functionName"] == "CONJ_GRAD"]
rawDF_CONJ_GRAD
# returned_MAPE_table = return_MAPE_Table_DF_from_rawDF(
# rawDF=rawDF,
# exp_var_list=exp_var,
# res_var_list=res_var,
# model_name_list=model_names_list,
# )
# df = returned_MAPE_table.rename(columns=dict_column_names_JP)
# df = df.set_index("関数名")
# df = df.sort_index(axis=0)
# df = df.sort_index(axis=1)
# df = addLowestMAPEsModelNameColumn(df, version=2, model_name_list=model_names_list_JP)
# print(
# df.to_latex(
# caption=f"ベンチマークプログラム{benchmark_name.upper()}における各モデルでのMAPE",
# label=f"{date}everyModelsMAPEtableIn{benchmark_name.upper()}",
# )
# )
| functionName | functionCallNum | benchmarkName | benchmarkClass | process | intBenchmarkClass | na | nonzer | niter | shift | |
|---|---|---|---|---|---|---|---|---|---|---|
| 5 | CONJ_GRAD | 16.0 | cg | A | 2 | 1 | 14000 | 11 | 15 | 20 |
| 5 | CONJ_GRAD | 16.0 | cg | A | 4 | 1 | 14000 | 11 | 15 | 20 |
| 5 | CONJ_GRAD | 16.0 | cg | A | 8 | 1 | 14000 | 11 | 15 | 20 |
| 5 | CONJ_GRAD | 16.0 | cg | A | 16 | 1 | 14000 | 11 | 15 | 20 |
| 5 | CONJ_GRAD | 16.0 | cg | A | 32 | 1 | 14000 | 11 | 15 | 20 |
| 5 | CONJ_GRAD | 16.0 | cg | A | 64 | 1 | 14000 | 11 | 15 | 20 |
| 5 | CONJ_GRAD | 16.0 | cg | A | 128 | 1 | 14000 | 11 | 15 | 20 |
| 6 | CONJ_GRAD | 16.0 | cg | A | 256 | 1 | 14000 | 11 | 15 | 20 |
| 2 | CONJ_GRAD | 31.0 | cg | B | 2 | 4 | 75000 | 13 | 75 | 60 |
| 2 | CONJ_GRAD | 46.0 | cg | B | 4 | 4 | 75000 | 13 | 75 | 60 |
| 2 | CONJ_GRAD | 76.0 | cg | B | 8 | 4 | 75000 | 13 | 75 | 60 |
| 2 | CONJ_GRAD | 76.0 | cg | B | 16 | 4 | 75000 | 13 | 75 | 60 |
| 2 | CONJ_GRAD | 76.0 | cg | B | 32 | 4 | 75000 | 13 | 75 | 60 |
| 2 | CONJ_GRAD | 76.0 | cg | B | 64 | 4 | 75000 | 13 | 75 | 60 |
| 2 | CONJ_GRAD | 76.0 | cg | B | 128 | 4 | 75000 | 13 | 75 | 60 |
| 2 | CONJ_GRAD | 76.0 | cg | B | 256 | 4 | 75000 | 13 | 75 | 60 |
| 2 | CONJ_GRAD | 76.0 | cg | C | 2 | 16 | 150000 | 15 | 75 | 110 |
| 2 | CONJ_GRAD | 76.0 | cg | C | 4 | 16 | 150000 | 15 | 75 | 110 |
| 2 | CONJ_GRAD | 76.0 | cg | C | 8 | 16 | 150000 | 15 | 75 | 110 |
| 2 | CONJ_GRAD | 76.0 | cg | C | 16 | 16 | 150000 | 15 | 75 | 110 |
| 2 | CONJ_GRAD | 76.0 | cg | C | 32 | 16 | 150000 | 15 | 75 | 110 |
| 2 | CONJ_GRAD | 76.0 | cg | C | 64 | 16 | 150000 | 15 | 75 | 110 |
| 2 | CONJ_GRAD | 76.0 | cg | C | 128 | 16 | 150000 | 15 | 75 | 110 |
| 2 | CONJ_GRAD | 76.0 | cg | C | 256 | 16 | 150000 | 15 | 75 | 110 |
| 2 | CONJ_GRAD | 76.0 | cg | D | 2 | 256 | 1500000 | 21 | 100 | 500 |
| 2 | CONJ_GRAD | 76.0 | cg | D | 4 | 256 | 1500000 | 21 | 100 | 500 |
| 2 | CONJ_GRAD | 76.0 | cg | D | 8 | 256 | 1500000 | 21 | 100 | 500 |
| 2 | CONJ_GRAD | 101.0 | cg | D | 16 | 256 | 1500000 | 21 | 100 | 500 |
| 2 | CONJ_GRAD | 101.0 | cg | D | 32 | 256 | 1500000 | 21 | 100 | 500 |
| 2 | CONJ_GRAD | 101.0 | cg | D | 64 | 256 | 1500000 | 21 | 100 | 500 |
| 2 | CONJ_GRAD | 101.0 | cg | D | 128 | 256 | 1500000 | 21 | 100 | 500 |
| 2 | CONJ_GRAD | 101.0 | cg | D | 256 | 256 | 1500000 | 21 | 100 | 500 |
| 2 | CONJ_GRAD | 101.0 | cg | E | 32 | 4096 | 9000000 | 26 | 100 | 1500 |
| 2 | CONJ_GRAD | 101.0 | cg | E | 64 | 4096 | 9000000 | 26 | 100 | 1500 |
| 2 | CONJ_GRAD | 101.0 | cg | E | 128 | 4096 | 9000000 | 26 | 100 | 1500 |
| 2 | CONJ_GRAD | 101.0 | cg | E | 256 | 4096 | 9000000 | 26 | 100 | 1500 |
| 2 | CONJ_GRAD | 101.0 | cg | F | 256 | 65536 | 54000000 | 26 | 100 | 1500 |
%reset -f
# ipynb形式のライブラリのインポート
%run ./lib/lib.ipynb
# 生データの入ったCSVファイルの保持されたディレクトリ名を格納している変数
csvDirPath = "./csv_files/"
benchmarkNames = ["cg", "ep", "ft", "is", "lu", "mg"]
DEBUG:__main__:hello
benchmark_name = "cg"
classes = ["A", "B", "C", "D"]
processes = [2, 4, 8, 16, 32, 64, 128]
dict_column_names_JP = {
"functionName": "関数名",
"modelLin": "線形",
# "modelIp": "反比例",
# "modelLog": "対数",
"modelLinAndIp": "線形&反比例",
"modelLinAndLog": "線形&対数",
# "modelIpAndLin": "反比例&線形",
# "modelIpAndLog": "反比例&対数",
# "modelLogAndLin": "対数&線形",
# "modelLogAndIp": "対数&反比例",
# "modelProcessDividedByProblemSize": "プロセス数を問題サイズで割る"
}
model_names_list_JP = list(dict_column_names_JP.values())
model_names_list_JP.remove("関数名")
model_names_list = list(dict_column_names_JP.keys())
model_names_list.remove("functionName")
date = "2022年3月16日"
# コア数と問題サイズの両方を振る
rawDF = return_rawDF_with_init_param(
benchmark_name=benchmark_name,
classes=classes,
processes=processes,
csv_dir_path="./csv_files/",
)
rawDF = rawDF[rawDF["functionName"] == "CONJ_GRAD"]
exp_var = rawDF.columns.tolist()
for element_be_removed in [
"functionName",
"functionCallNum",
"intBenchmarkClass",
"benchmarkName",
"benchmarkClass",
]:
exp_var.remove(element_be_removed)
res_var = ["functionCallNum"]
returned_MAPE_table = return_MAPE_Table_DF_from_rawDF(
rawDF=rawDF,
exp_var_list=exp_var,
res_var_list=res_var,
model_name_list=model_names_list,
)
df = returned_MAPE_table.rename(columns=dict_column_names_JP)
df = df.set_index("関数名")
df = df.sort_index(axis=0)
df = df.sort_index(axis=1)
df = addLowestMAPEsModelNameColumn(df, version=2, model_name_list=model_names_list_JP)
pd.options.display.precision = 15
print(df)
print(
df.style.to_latex(
caption=f"ベンチマークプログラム{benchmark_name.upper()}における各モデルでのMAPE",
label=f"{date}allModelsMAPEin{benchmark_name.upper()}",
)
)
pd.options.display.precision = 6
線形 線形&反比例 線形&対数 最適モデル
関数名
CONJ_GRAD 16.051673712330807 16.051673714205847 16.051673714205901 線形
\begin{table}
\caption{ベンチマークプログラムCGにおける各モデルでのMAPE}
\label{2022年3月16日allModelsMAPEinCG}
\begin{tabular}{lrrrl}
& 線形 & 線形&反比例 & 線形&対数 & 最適モデル \\
関数名 & & & & \\
CONJ_GRAD & 16.051674 & 16.051674 & 16.051674 & 線形 \\
\end{tabular}
\end{table}
%reset -f
# ipynb形式のライブラリのインポート
%run ./lib/lib.ipynb
# 生データの入ったCSVファイルの保持されたディレクトリ名を格納している変数
csvDirPath = "./csv_files/"
benchmarkNames = ["cg", "ep", "ft", "is", "lu", "mg"]
DEBUG:__main__:hello
benchmark_name = "cg"
classes = ["A", "B", "C", "D", "E", "F"]
processes = [2, 4, 8, 16, 32, 64, 128, 256]
dict_column_names_JP = {
"functionName": "関数名",
"modelLin": "線形",
"modelIp": "反比例",
"modelLog": "対数",
"modelLinAndIp": "線形&反比例",
"modelLinAndLog": "線形&対数",
"modelIpAndLin": "反比例&線形",
"modelIpAndLog": "反比例&対数",
"modelLogAndLin": "対数&線形",
"modelLogAndIp": "対数&反比例",
"modelProcessDividedByProblemSize": "プロセス数を問題サイズで割る",
"modelBasicTree": "回帰木",
}
model_names_list_JP = list(dict_column_names_JP.values())
model_names_list_JP.remove("関数名")
model_names_list = list(dict_column_names_JP.keys())
model_names_list.remove("functionName")
date = "2022年3月9日"
# コア数と問題サイズの両方を振る
rawDF = return_rawDF_with_init_param(
benchmark_name=benchmark_name,
classes=classes,
processes=processes,
csv_dir_path="./csv_files/",
)
exp_var = rawDF.columns.tolist()
for element_be_removed in [
"functionName",
"functionCallNum",
"intBenchmarkClass",
"benchmarkName",
"benchmarkClass",
]:
exp_var.remove(element_be_removed)
res_var = ["functionCallNum"]
print(f"exp_var = {exp_var}")
returned_MAPE_table = return_MAPE_Table_DF_from_rawDF(
rawDF=rawDF,
exp_var_list=exp_var,
res_var_list=res_var,
model_name_list=model_names_list,
)
df = returned_MAPE_table.rename(columns=dict_column_names_JP)
df = df.set_index("関数名")
df = df.sort_index(axis=0)
df = df.sort_index(axis=1)
df = addLowestMAPEsModelNameColumn(df, version=2, model_name_list=model_names_list_JP)
print(
df.style.to_latex(
caption=f"ベンチマークプログラム{benchmark_name.upper()}における各モデルでのMAPE",
label=f"{date}allModelsMAPEin{benchmark_name.upper()}",
)
)
exp_var = ['process', 'na', 'nonzer', 'niter', 'shift']
/usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated
\begin{table}
\caption{ベンチマークプログラムCGにおける各モデルでのMAPE}
\label{2022年3月9日allModelsMAPEinCG}
\begin{tabular}{lrrrrrrrrrrrl}
& プロセス数を問題サイズで割る & 反比例 & 反比例&対数 & 反比例&線形 & 回帰木 & 対数 & 対数&反比例 & 対数&線形 & 線形 & 線形&反比例 & 線形&対数 & 最適モデル \\
関数名 & & & & & & & & & & & & \\
.TAU_application & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
ALLOC_SPACE & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
CG & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
CONJ_GRAD & 37.550135 & 13.876670 & 13.732936 & 14.498431 & 0.000000 & 14.804389 & 14.717362 & 15.248570 & 13.161174 & 12.412559 & 12.755412 & 回帰木 \\
ICNVRT & 159.614199 & 662.481358 & 2504.274677 & 894.574028 & 0.000000 & 2443.699386 & 3811.490854 & 815.789372 & 502.123230 & 6621.011052 & 2283.060136 & 回帰木 \\
INITIALIZE_MPI & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MAKEA & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI_Barrier() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI_Bcast() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI_Comm_rank() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI_Comm_size() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI_Finalize() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI_Init() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI_Irecv() & 208.955855 & 38.629896 & 38.552817 & 39.029932 & 0.000000 & 35.489902 & 35.487038 & 35.954193 & 34.989183 & 35.515528 & 35.660459 & 回帰木 \\
MPI_Reduce() & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
MPI_Send() & 208.955855 & 38.629896 & 38.552817 & 39.029932 & 0.000000 & 35.489902 & 35.487038 & 35.954193 & 34.989183 & 35.515528 & 35.660459 & 回帰木 \\
MPI_Wait() & 208.955855 & 38.629896 & 38.552817 & 39.029932 & 0.000000 & 35.489902 & 35.487038 & 35.954193 & 34.989183 & 35.515528 & 35.660459 & 回帰木 \\
SETUP_PROC_INFO & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
SETUP_SUBMATRIX_INFO & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
SPARSE & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 線形 \\
SPRNVC & 153.544935 & 336.648986 & 1321.895548 & 345.739861 & 0.000000 & 1306.978016 & 1503.097645 & 338.951990 & 198.005854 & 2573.964383 & 1221.597908 & 回帰木 \\
VECSET & 153.544935 & 336.648986 & 1321.895548 & 345.739861 & 0.000000 & 1306.978016 & 1503.097645 & 338.951990 & 198.005854 & 2573.964383 & 1221.597908 & 回帰木 \\
\end{tabular}
\end{table}
%reset -f
# ipynb形式のライブラリのインポート
%run ./lib/lib.ipynb
# 生データの入ったCSVファイルの保持されたディレクトリ名を格納している変数
csvDirPath = "./csv_files/"
benchmarkNames = ["cg", "ep", "ft", "is", "lu", "mg"]
DEBUG:__main__:hello
# プロセス数を固定した場合
benchmark_name: str = "cg"
classes: list[str] = ["A", "B", "C", "D", "E", "F"]
processes: list[int] = [2, 4, 8, 16, 32, 64, 128, 256]
dict_column_names_JP: dict[str, str] = {
"functionName": "関数名",
"modelLin": "線形",
"modelIp": "反比例",
"modelLog": "対数",
# "modelLinAndIp": "線形&反比例",
# "modelLinAndLog": "線形&対数",
# "modelIpAndLin": "反比例&線形",
# "modelIpAndLog": "反比例&対数",
# "modelLogAndLin": "対数&線形",
# "modelLogAndIp": "対数&反比例",
"modelProcessDividedByProblemSize": "プロセス数を問題サイズで割る",
"modelProblemSizeDividedByProcess": "問題サイズをプロセス数で割る",
# "modelBasicTree": "回帰木",
}
model_names_list_JP: list[str] = list(dict_column_names_JP.values())
model_names_list_JP.remove("関数名")
model_names_list: list[str] = list(dict_column_names_JP.keys())
model_names_list.remove("functionName")
date: str = "2022年3月30日"
# コア数と問題サイズの両方を振る
rawDF: pd.DataFrame = return_rawDF_with_init_param(
benchmark_name=benchmark_name,
classes=classes,
processes=processes,
csv_dir_path="./csv_files/",
)
functionName: str = "ICNVRT"
rawDF = rawDF[rawDF["functionName"] == functionName]
exp_var: list[str] = rawDF.columns.tolist()
for element_be_removed in [
"functionName",
"functionCallNum",
"intBenchmarkClass",
"benchmarkName",
"benchmarkClass",
]:
exp_var.remove(element_be_removed)
res_var: list[str] = ["functionCallNum"]
print(f"exp_var = {exp_var}")
returned_MAPE_table: pd.DataFrame = return_MAPE_Table_DF_from_rawDF(
rawDF=rawDF,
exp_var_list=exp_var,
res_var_list=res_var,
model_name_list=model_names_list,
)
df: pd.DataFrame = returned_MAPE_table.rename(columns=dict_column_names_JP)
df = df.set_index("関数名")
df = df.sort_index(axis=0)
df = df.sort_index(axis=1)
df = addLowestMAPEsModelNameColumn(df, version=2, model_name_list=model_names_list_JP)
print(
df.style.set_precision(2).to_latex(
caption=f"ベンチマークプログラム{benchmark_name.upper()}の関数{functionName}おける各モデルでのMAPE",
label=f"{date}allModelsMAPEin{benchmark_name.upper()}",
)
)
df.style.format(precision=2).highlight_min(axis=1, subset=model_names_list_JP)
exp_var = ['process', 'na', 'nonzer', 'niter', 'shift']
\begin{table}
\caption{ベンチマークプログラムCGの関数ICNVRTおける各モデルでのMAPE}
\label{2022年3月30日allModelsMAPEinCG}
\begin{tabular}{lrrrrrl}
& プロセス数を問題サイズで割る & 反比例 & 問題サイズをプロセス数で割る & 対数 & 線形 & 最適モデル \\
関数名 & & & & & & \\
ICNVRT & 159.61 & 662.48 & 119.98 & 2443.70 & 502.12 & 問題サイズをプロセス数で割る \\
\end{tabular}
\end{table}
/usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /tmp/ipykernel_137/1608739983.py:70: FutureWarning: this method is deprecated in favour of `Styler.format(precision=..)`
| プロセス数を問題サイズで割る | 反比例 | 問題サイズをプロセス数で割る | 対数 | 線形 | 最適モデル | |
|---|---|---|---|---|---|---|
| 関数名 | ||||||
| ICNVRT | 159.61 | 662.48 | 119.98 | 2443.70 | 502.12 | 問題サイズをプロセス数で割る |
# 問題サイズを固定した場合
benchmark_name: str = "cg"
classes: list[str] = ["C"]
processes: list[int] = [2, 4, 8, 16, 32, 64, 128, 256]
dict_column_names_JP: dict[str, str] = {
"functionName": "関数名",
"modelLin": "線形",
"modelIp": "反比例",
"modelLog": "対数",
# "modelLinAndIp": "線形&反比例",
# "modelLinAndLog": "線形&対数",
# "modelIpAndLin": "反比例&線形",
# "modelIpAndLog": "反比例&対数",
# "modelLogAndLin": "対数&線形",
# "modelLogAndIp": "対数&反比例",
# "modelProcessDividedByProblemSize": "プロセス数を問題サイズで割る",
# "modelProblemSizeDividedByProcess": "問題サイズをプロセス数で割る",
# "modelBasicTree": "回帰木",
}
model_names_list_JP: list[str] = list(dict_column_names_JP.values())
model_names_list_JP.remove("関数名")
model_names_list: list[str] = list(dict_column_names_JP.keys())
model_names_list.remove("functionName")
date: str = "2022年3月30日"
# コア数と問題サイズの両方を振る
rawDF: pd.DataFrame = return_rawDF_with_init_param(
benchmark_name=benchmark_name,
classes=classes,
processes=processes,
csv_dir_path="./csv_files/",
)
exp_var: list[str] = rawDF.columns.tolist()
for element_be_removed in [
"functionName",
"functionCallNum",
"intBenchmarkClass",
"benchmarkName",
"benchmarkClass",
]:
exp_var.remove(element_be_removed)
res_var: list[str] = ["functionCallNum"]
print(f"exp_var = {exp_var}")
returned_MAPE_table: pd.DataFrame = return_MAPE_Table_DF_from_rawDF(
rawDF=rawDF,
exp_var_list=exp_var,
res_var_list=res_var,
model_name_list=model_names_list,
)
df: pd.DataFrame = returned_MAPE_table.rename(columns=dict_column_names_JP)
df = df.set_index("関数名")
df = df.sort_index(axis=0)
df = df.sort_index(axis=1)
df = addLowestMAPEsModelNameColumn(df, version=2, model_name_list=model_names_list_JP)
# print(
# df.style.set_precision(2).to_latex(
# caption=f"ベンチマークプログラム{benchmark_name.upper()}における各モデルでのMAPE",
# label=f"{date}allModelsMAPEin{benchmark_name.upper()}",
# )
# )
df.style.format(precision=2).highlight_min(axis=1, subset=model_names_list_JP)
exp_var = ['process', 'na', 'nonzer', 'niter', 'shift']
| 反比例 | 対数 | 線形 | 最適モデル | |
|---|---|---|---|---|
| 関数名 | ||||
| .TAU_application | 0.00 | 0.00 | 0.00 | 線形 |
| ALLOC_SPACE | 0.00 | 0.00 | 0.00 | 線形 |
| CG | 0.00 | 0.00 | 0.00 | 線形 |
| CONJ_GRAD | 0.00 | 0.00 | 0.00 | 線形 |
| ICNVRT | 3.88 | 10.59 | 15.85 | 反比例 |
| INITIALIZE_MPI | 0.00 | 0.00 | 0.00 | 線形 |
| MAKEA | 0.00 | 0.00 | 0.00 | 線形 |
| MPI_Barrier() | 0.00 | 0.00 | 0.00 | 線形 |
| MPI_Bcast() | 0.00 | 0.00 | 0.00 | 線形 |
| MPI_Comm_rank() | 0.00 | 0.00 | 0.00 | 線形 |
| MPI_Comm_size() | 0.00 | 0.00 | 0.00 | 線形 |
| MPI_Finalize() | 0.00 | 0.00 | 0.00 | 線形 |
| MPI_Init() | 0.00 | 0.00 | 0.00 | 線形 |
| MPI_Irecv() | 21.86 | 9.45 | 14.84 | 対数 |
| MPI_Reduce() | 0.00 | 0.00 | 0.00 | 線形 |
| MPI_Send() | 21.86 | 9.45 | 14.84 | 対数 |
| MPI_Wait() | 21.86 | 9.45 | 14.84 | 対数 |
| SETUP_PROC_INFO | 0.00 | 0.00 | 0.00 | 線形 |
| SETUP_SUBMATRIX_INFO | 0.00 | 0.00 | 0.00 | 線形 |
| SPARSE | 0.00 | 0.00 | 0.00 | 線形 |
| SPRNVC | 3.35 | 9.06 | 13.44 | 反比例 |
| VECSET | 3.35 | 9.06 | 13.44 | 反比例 |
# プロセス数を固定した場合
benchmark_name: str = "cg"
classes: list[str] = ["A", "B", "C", "D", "E", "F"]
processes: list[int] = [64]
dict_column_names_JP: dict[str, str] = {
"functionName": "関数名",
"modelLin": "線形",
"modelIp": "反比例",
"modelLog": "対数",
# "modelLinAndIp": "線形&反比例",
# "modelLinAndLog": "線形&対数",
# "modelIpAndLin": "反比例&線形",
# "modelIpAndLog": "反比例&対数",
# "modelLogAndLin": "対数&線形",
# "modelLogAndIp": "対数&反比例",
# "modelProcessDividedByProblemSize": "プロセス数を問題サイズで割る",
# "modelProblemSizeDividedByProcess": "問題サイズをプロセス数で割る",
# "modelBasicTree": "回帰木",
}
model_names_list_JP: list[str] = list(dict_column_names_JP.values())
model_names_list_JP.remove("関数名")
model_names_list: list[str] = list(dict_column_names_JP.keys())
model_names_list.remove("functionName")
date: str = "2022年3月30日"
# コア数と問題サイズの両方を振る
rawDF: pd.DataFrame = return_rawDF_with_init_param(
benchmark_name=benchmark_name,
classes=classes,
processes=processes,
csv_dir_path="./csv_files/",
)
exp_var: list[str] = rawDF.columns.tolist()
for element_be_removed in [
"functionName",
"functionCallNum",
"intBenchmarkClass",
"benchmarkName",
"benchmarkClass",
]:
exp_var.remove(element_be_removed)
res_var: list[str] = ["functionCallNum"]
print(f"exp_var = {exp_var}")
returned_MAPE_table: pd.DataFrame = return_MAPE_Table_DF_from_rawDF(
rawDF=rawDF,
exp_var_list=exp_var,
res_var_list=res_var,
model_name_list=model_names_list,
)
df: pd.DataFrame = returned_MAPE_table.rename(columns=dict_column_names_JP)
df = df.set_index("関数名")
df = df.sort_index(axis=0)
df = df.sort_index(axis=1)
df = addLowestMAPEsModelNameColumn(df, version=2, model_name_list=model_names_list_JP)
# print(
# df.style.set_precision(2).to_latex(
# caption=f"ベンチマークプログラム{benchmark_name.upper()}における各モデルでのMAPE",
# label=f"{date}allModelsMAPEin{benchmark_name.upper()}",
# )
# )
df.style.format(precision=2).highlight_min(axis=1, subset=model_names_list_JP)
exp_var = ['process', 'na', 'nonzer', 'niter', 'shift']
| 反比例 | 対数 | 線形 | 最適モデル | |
|---|---|---|---|---|
| 関数名 | ||||
| .TAU_application | 0.00 | 0.00 | 0.00 | 線形 |
| ALLOC_SPACE | 0.00 | 0.00 | 0.00 | 線形 |
| CG | 0.00 | 0.00 | 0.00 | 線形 |
| CONJ_GRAD | 0.00 | 0.00 | 0.00 | 線形 |
| ICNVRT | 0.00 | 0.00 | 0.00 | 線形 |
| INITIALIZE_MPI | 0.00 | 0.00 | 0.00 | 線形 |
| MAKEA | 0.00 | 0.00 | 0.00 | 線形 |
| MPI_Barrier() | 0.00 | 0.00 | 0.00 | 線形 |
| MPI_Bcast() | 0.00 | 0.00 | 0.00 | 線形 |
| MPI_Comm_rank() | 0.00 | 0.00 | 0.00 | 線形 |
| MPI_Comm_size() | 0.00 | 0.00 | 0.00 | 線形 |
| MPI_Finalize() | 0.00 | 0.00 | 0.00 | 線形 |
| MPI_Init() | 0.00 | 0.00 | 0.00 | 線形 |
| MPI_Irecv() | 0.00 | 0.00 | 0.00 | 線形 |
| MPI_Reduce() | 0.00 | 0.00 | 0.00 | 線形 |
| MPI_Send() | 0.00 | 0.00 | 0.00 | 線形 |
| MPI_Wait() | 0.00 | 0.00 | 0.00 | 線形 |
| SETUP_PROC_INFO | 0.00 | 0.00 | 0.00 | 線形 |
| SETUP_SUBMATRIX_INFO | 0.00 | 0.00 | 0.00 | 線形 |
| SPARSE | 0.00 | 0.00 | 0.00 | 線形 |
| SPRNVC | 0.00 | 0.00 | 0.00 | 線形 |
| VECSET | 0.00 | 0.00 | 0.00 | 線形 |
benchmark_name: str = "cg"
classes: list[str] = ["A", "B", "C", "D", "E", "F"]
processes: list[int] = [2, 4, 8, 16, 32, 64, 128, 256]
dict_column_names_JP: dict[str, str] = {
"functionName": "関数名",
"modelLin": "線形",
"modelIp": "反比例",
"modelLog": "対数",
"modelLinAndIp": "線形&反比例",
"modelLinAndLog": "線形&対数",
"modelIpAndLin": "反比例&線形",
"modelIpAndLog": "反比例&対数",
"modelLogAndLin": "対数&線形",
"modelLogAndIp": "対数&反比例",
"modelProcessDividedByProblemSize": "プロセス数を問題サイズで割る",
"modelProblemSizeDividedByProcess": "問題サイズをプロセス数で割る",
"modelBasicTree": "回帰木",
}
model_names_list_JP: list[str] = list(dict_column_names_JP.values())
model_names_list_JP.remove("関数名")
model_names_list: list[str] = list(dict_column_names_JP.keys())
model_names_list.remove("functionName")
date: str = "2022年3月23日"
# コア数と問題サイズの両方を振る
rawDF: pd.DataFrame = return_rawDF_with_init_param(
benchmark_name=benchmark_name,
classes=classes,
processes=processes,
csv_dir_path="./csv_files/",
)
exp_var: list[str] = rawDF.columns.tolist()
for element_be_removed in [
"functionName",
"functionCallNum",
"intBenchmarkClass",
"benchmarkName",
"benchmarkClass",
]:
exp_var.remove(element_be_removed)
res_var: list[str] = ["functionCallNum"]
print(f"exp_var = {exp_var}")
returned_MAPE_table: pd.DataFrame = return_MAPE_Table_DF_from_rawDF(
rawDF=rawDF,
exp_var_list=exp_var,
res_var_list=res_var,
model_name_list=model_names_list,
)
df: pd.DataFrame = returned_MAPE_table.rename(columns=dict_column_names_JP)
df = df.set_index("関数名")
df = df.sort_index(axis=0)
df = df.sort_index(axis=1)
df = addLowestMAPEsModelNameColumn(df, version=2, model_name_list=model_names_list_JP)
print(
df.style.set_precision(2).to_latex(
caption=f"ベンチマークプログラム{benchmark_name.upper()}における各モデルでのMAPE",
label=f"{date}allModelsMAPEin{benchmark_name.upper()}",
)
)
exp_var = ['process', 'na', 'nonzer', 'niter', 'shift']
/usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated
\begin{table}
\caption{ベンチマークプログラムCGにおける各モデルでのMAPE}
\label{2022年3月23日allModelsMAPEinCG}
\begin{tabular}{lrrrrrrrrrrrrl}
& プロセス数を問題サイズで割る & 反比例 & 反比例&対数 & 反比例&線形 & 問題サイズをプロセス数で割る & 回帰木 & 対数 & 対数&反比例 & 対数&線形 & 線形 & 線形&反比例 & 線形&対数 & 最適モデル \\
関数名 & & & & & & & & & & & & & \\
.TAU_application & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 線形 \\
ALLOC_SPACE & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 線形 \\
CG & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 線形 \\
CONJ_GRAD & 37.55 & 13.88 & 13.73 & 14.50 & 42.76 & 0.00 & 14.80 & 14.72 & 15.25 & 13.16 & 12.41 & 12.76 & 回帰木 \\
ICNVRT & 159.61 & 662.48 & 2504.27 & 894.57 & 119.98 & 0.00 & 2443.70 & 3811.49 & 815.79 & 502.12 & 6621.01 & 2283.06 & 回帰木 \\
INITIALIZE_MPI & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 線形 \\
MAKEA & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 線形 \\
MPI_Barrier() & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 線形 \\
MPI_Bcast() & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 線形 \\
MPI_Comm_rank() & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 線形 \\
MPI_Comm_size() & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 線形 \\
MPI_Finalize() & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 線形 \\
MPI_Init() & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 線形 \\
MPI_Irecv() & 208.96 & 38.63 & 38.55 & 39.03 & 56.48 & 0.00 & 35.49 & 35.49 & 35.95 & 34.99 & 35.52 & 35.66 & 回帰木 \\
MPI_Reduce() & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 線形 \\
MPI_Send() & 208.96 & 38.63 & 38.55 & 39.03 & 56.48 & 0.00 & 35.49 & 35.49 & 35.95 & 34.99 & 35.52 & 35.66 & 回帰木 \\
MPI_Wait() & 208.96 & 38.63 & 38.55 & 39.03 & 56.48 & 0.00 & 35.49 & 35.49 & 35.95 & 34.99 & 35.52 & 35.66 & 回帰木 \\
SETUP_PROC_INFO & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 線形 \\
SETUP_SUBMATRIX_INFO & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 線形 \\
SPARSE & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 線形 \\
SPRNVC & 153.54 & 336.65 & 1321.90 & 345.74 & 182.72 & 0.00 & 1306.98 & 1503.10 & 338.95 & 198.01 & 2573.96 & 1221.60 & 回帰木 \\
VECSET & 153.54 & 336.65 & 1321.90 & 345.74 & 182.72 & 0.00 & 1306.98 & 1503.10 & 338.95 & 198.01 & 2573.96 & 1221.60 & 回帰木 \\
\end{tabular}
\end{table}
/usr/local/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated /tmp/ipykernel_137/3542956666.py:66: FutureWarning: this method is deprecated in favour of `Styler.format(precision=..)`